1 //===-- Symtab.cpp --------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <map>
10 #include <set>
11 
12 #include "lldb/Core/Module.h"
13 #include "lldb/Core/RichManglingContext.h"
14 #include "lldb/Core/Section.h"
15 #include "lldb/Symbol/ObjectFile.h"
16 #include "lldb/Symbol/Symbol.h"
17 #include "lldb/Symbol/SymbolContext.h"
18 #include "lldb/Symbol/Symtab.h"
19 #include "lldb/Target/Language.h"
20 #include "lldb/Utility/RegularExpression.h"
21 #include "lldb/Utility/Stream.h"
22 #include "lldb/Utility/Timer.h"
23 
24 #include "llvm/ADT/StringRef.h"
25 
26 using namespace lldb;
27 using namespace lldb_private;
28 
29 Symtab::Symtab(ObjectFile *objfile)
30     : m_objfile(objfile), m_symbols(), m_file_addr_to_index(*this),
31       m_name_to_symbol_indices(), m_mutex(),
32       m_file_addr_to_index_computed(false), m_name_indexes_computed(false) {
33   m_name_to_symbol_indices.emplace(std::make_pair(
34       lldb::eFunctionNameTypeNone, UniqueCStringMap<uint32_t>()));
35   m_name_to_symbol_indices.emplace(std::make_pair(
36       lldb::eFunctionNameTypeBase, UniqueCStringMap<uint32_t>()));
37   m_name_to_symbol_indices.emplace(std::make_pair(
38       lldb::eFunctionNameTypeMethod, UniqueCStringMap<uint32_t>()));
39   m_name_to_symbol_indices.emplace(std::make_pair(
40       lldb::eFunctionNameTypeSelector, UniqueCStringMap<uint32_t>()));
41 }
42 
43 Symtab::~Symtab() {}
44 
45 void Symtab::Reserve(size_t count) {
46   // Clients should grab the mutex from this symbol table and lock it manually
47   // when calling this function to avoid performance issues.
48   m_symbols.reserve(count);
49 }
50 
51 Symbol *Symtab::Resize(size_t count) {
52   // Clients should grab the mutex from this symbol table and lock it manually
53   // when calling this function to avoid performance issues.
54   m_symbols.resize(count);
55   return m_symbols.empty() ? nullptr : &m_symbols[0];
56 }
57 
58 uint32_t Symtab::AddSymbol(const Symbol &symbol) {
59   // Clients should grab the mutex from this symbol table and lock it manually
60   // when calling this function to avoid performance issues.
61   uint32_t symbol_idx = m_symbols.size();
62   auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
63   name_to_index.Clear();
64   m_file_addr_to_index.Clear();
65   m_symbols.push_back(symbol);
66   m_file_addr_to_index_computed = false;
67   m_name_indexes_computed = false;
68   return symbol_idx;
69 }
70 
71 size_t Symtab::GetNumSymbols() const {
72   std::lock_guard<std::recursive_mutex> guard(m_mutex);
73   return m_symbols.size();
74 }
75 
76 void Symtab::SectionFileAddressesChanged() {
77   auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
78   name_to_index.Clear();
79   m_file_addr_to_index_computed = false;
80 }
81 
82 void Symtab::Dump(Stream *s, Target *target, SortOrder sort_order,
83                   Mangled::NamePreference name_preference) {
84   std::lock_guard<std::recursive_mutex> guard(m_mutex);
85 
86   //    s->Printf("%.*p: ", (int)sizeof(void*) * 2, this);
87   s->Indent();
88   const FileSpec &file_spec = m_objfile->GetFileSpec();
89   const char *object_name = nullptr;
90   if (m_objfile->GetModule())
91     object_name = m_objfile->GetModule()->GetObjectName().GetCString();
92 
93   if (file_spec)
94     s->Printf("Symtab, file = %s%s%s%s, num_symbols = %" PRIu64,
95               file_spec.GetPath().c_str(), object_name ? "(" : "",
96               object_name ? object_name : "", object_name ? ")" : "",
97               (uint64_t)m_symbols.size());
98   else
99     s->Printf("Symtab, num_symbols = %" PRIu64 "", (uint64_t)m_symbols.size());
100 
101   if (!m_symbols.empty()) {
102     switch (sort_order) {
103     case eSortOrderNone: {
104       s->PutCString(":\n");
105       DumpSymbolHeader(s);
106       const_iterator begin = m_symbols.begin();
107       const_iterator end = m_symbols.end();
108       for (const_iterator pos = m_symbols.begin(); pos != end; ++pos) {
109         s->Indent();
110         pos->Dump(s, target, std::distance(begin, pos), name_preference);
111       }
112     } break;
113 
114     case eSortOrderByName: {
115       // Although we maintain a lookup by exact name map, the table isn't
116       // sorted by name. So we must make the ordered symbol list up ourselves.
117       s->PutCString(" (sorted by name):\n");
118       DumpSymbolHeader(s);
119 
120       std::multimap<llvm::StringRef, const Symbol *> name_map;
121       for (const_iterator pos = m_symbols.begin(), end = m_symbols.end();
122            pos != end; ++pos) {
123         const char *name = pos->GetName().AsCString();
124         if (name && name[0])
125           name_map.insert(std::make_pair(name, &(*pos)));
126       }
127 
128       for (const auto &name_to_symbol : name_map) {
129         const Symbol *symbol = name_to_symbol.second;
130         s->Indent();
131         symbol->Dump(s, target, symbol - &m_symbols[0], name_preference);
132       }
133     } break;
134 
135     case eSortOrderByAddress:
136       s->PutCString(" (sorted by address):\n");
137       DumpSymbolHeader(s);
138       if (!m_file_addr_to_index_computed)
139         InitAddressIndexes();
140       const size_t num_entries = m_file_addr_to_index.GetSize();
141       for (size_t i = 0; i < num_entries; ++i) {
142         s->Indent();
143         const uint32_t symbol_idx = m_file_addr_to_index.GetEntryRef(i).data;
144         m_symbols[symbol_idx].Dump(s, target, symbol_idx, name_preference);
145       }
146       break;
147     }
148   } else {
149     s->PutCString("\n");
150   }
151 }
152 
153 void Symtab::Dump(Stream *s, Target *target, std::vector<uint32_t> &indexes,
154                   Mangled::NamePreference name_preference) const {
155   std::lock_guard<std::recursive_mutex> guard(m_mutex);
156 
157   const size_t num_symbols = GetNumSymbols();
158   // s->Printf("%.*p: ", (int)sizeof(void*) * 2, this);
159   s->Indent();
160   s->Printf("Symtab %" PRIu64 " symbol indexes (%" PRIu64 " symbols total):\n",
161             (uint64_t)indexes.size(), (uint64_t)m_symbols.size());
162   s->IndentMore();
163 
164   if (!indexes.empty()) {
165     std::vector<uint32_t>::const_iterator pos;
166     std::vector<uint32_t>::const_iterator end = indexes.end();
167     DumpSymbolHeader(s);
168     for (pos = indexes.begin(); pos != end; ++pos) {
169       size_t idx = *pos;
170       if (idx < num_symbols) {
171         s->Indent();
172         m_symbols[idx].Dump(s, target, idx, name_preference);
173       }
174     }
175   }
176   s->IndentLess();
177 }
178 
179 void Symtab::DumpSymbolHeader(Stream *s) {
180   s->Indent("               Debug symbol\n");
181   s->Indent("               |Synthetic symbol\n");
182   s->Indent("               ||Externally Visible\n");
183   s->Indent("               |||\n");
184   s->Indent("Index   UserID DSX Type            File Address/Value Load "
185             "Address       Size               Flags      Name\n");
186   s->Indent("------- ------ --- --------------- ------------------ "
187             "------------------ ------------------ ---------- "
188             "----------------------------------\n");
189 }
190 
191 static int CompareSymbolID(const void *key, const void *p) {
192   const user_id_t match_uid = *(const user_id_t *)key;
193   const user_id_t symbol_uid = ((const Symbol *)p)->GetID();
194   if (match_uid < symbol_uid)
195     return -1;
196   if (match_uid > symbol_uid)
197     return 1;
198   return 0;
199 }
200 
201 Symbol *Symtab::FindSymbolByID(lldb::user_id_t symbol_uid) const {
202   std::lock_guard<std::recursive_mutex> guard(m_mutex);
203 
204   Symbol *symbol =
205       (Symbol *)::bsearch(&symbol_uid, &m_symbols[0], m_symbols.size(),
206                           sizeof(m_symbols[0]), CompareSymbolID);
207   return symbol;
208 }
209 
210 Symbol *Symtab::SymbolAtIndex(size_t idx) {
211   // Clients should grab the mutex from this symbol table and lock it manually
212   // when calling this function to avoid performance issues.
213   if (idx < m_symbols.size())
214     return &m_symbols[idx];
215   return nullptr;
216 }
217 
218 const Symbol *Symtab::SymbolAtIndex(size_t idx) const {
219   // Clients should grab the mutex from this symbol table and lock it manually
220   // when calling this function to avoid performance issues.
221   if (idx < m_symbols.size())
222     return &m_symbols[idx];
223   return nullptr;
224 }
225 
226 static bool lldb_skip_name(llvm::StringRef mangled,
227                            Mangled::ManglingScheme scheme) {
228   switch (scheme) {
229   case Mangled::eManglingSchemeItanium: {
230     if (mangled.size() < 3 || !mangled.startswith("_Z"))
231       return true;
232 
233     // Avoid the following types of symbols in the index.
234     switch (mangled[2]) {
235     case 'G': // guard variables
236     case 'T': // virtual tables, VTT structures, typeinfo structures + names
237     case 'Z': // named local entities (if we eventually handle
238               // eSymbolTypeData, we will want this back)
239       return true;
240 
241     default:
242       break;
243     }
244 
245     // Include this name in the index.
246     return false;
247   }
248 
249   // No filters for this scheme yet. Include all names in indexing.
250   case Mangled::eManglingSchemeMSVC:
251     return false;
252 
253   // No filters for this scheme yet. Include all names in indexing.
254   case Mangled::eManglingSchemeRustV0:
255     return false;
256 
257   // Don't try and demangle things we can't categorize.
258   case Mangled::eManglingSchemeNone:
259     return true;
260   }
261   llvm_unreachable("unknown scheme!");
262 }
263 
264 void Symtab::InitNameIndexes() {
265   // Protected function, no need to lock mutex...
266   if (!m_name_indexes_computed) {
267     m_name_indexes_computed = true;
268     LLDB_SCOPED_TIMER();
269 
270     // Collect all loaded language plugins.
271     std::vector<Language *> languages;
272     Language::ForEach([&languages](Language *l) {
273       languages.push_back(l);
274       return true;
275     });
276 
277     auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
278     auto &basename_to_index =
279         GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
280     auto &method_to_index =
281         GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
282     auto &selector_to_index =
283         GetNameToSymbolIndexMap(lldb::eFunctionNameTypeSelector);
284     // Create the name index vector to be able to quickly search by name
285     const size_t num_symbols = m_symbols.size();
286     name_to_index.Reserve(num_symbols);
287 
288     // The "const char *" in "class_contexts" and backlog::value_type::second
289     // must come from a ConstString::GetCString()
290     std::set<const char *> class_contexts;
291     std::vector<std::pair<NameToIndexMap::Entry, const char *>> backlog;
292     backlog.reserve(num_symbols / 2);
293 
294     // Instantiation of the demangler is expensive, so better use a single one
295     // for all entries during batch processing.
296     RichManglingContext rmc;
297     for (uint32_t value = 0; value < num_symbols; ++value) {
298       Symbol *symbol = &m_symbols[value];
299 
300       // Don't let trampolines get into the lookup by name map If we ever need
301       // the trampoline symbols to be searchable by name we can remove this and
302       // then possibly add a new bool to any of the Symtab functions that
303       // lookup symbols by name to indicate if they want trampolines.
304       if (symbol->IsTrampoline())
305         continue;
306 
307       // If the symbol's name string matched a Mangled::ManglingScheme, it is
308       // stored in the mangled field.
309       Mangled &mangled = symbol->GetMangled();
310       if (ConstString name = mangled.GetMangledName()) {
311         name_to_index.Append(name, value);
312 
313         if (symbol->ContainsLinkerAnnotations()) {
314           // If the symbol has linker annotations, also add the version without
315           // the annotations.
316           ConstString stripped = ConstString(
317               m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
318           name_to_index.Append(stripped, value);
319         }
320 
321         const SymbolType type = symbol->GetType();
322         if (type == eSymbolTypeCode || type == eSymbolTypeResolver) {
323           if (mangled.DemangleWithRichManglingInfo(rmc, lldb_skip_name))
324             RegisterMangledNameEntry(value, class_contexts, backlog, rmc);
325         }
326       }
327 
328       // Symbol name strings that didn't match a Mangled::ManglingScheme, are
329       // stored in the demangled field.
330       if (ConstString name = mangled.GetDemangledName()) {
331         name_to_index.Append(name, value);
332 
333         if (symbol->ContainsLinkerAnnotations()) {
334           // If the symbol has linker annotations, also add the version without
335           // the annotations.
336           name = ConstString(
337               m_objfile->StripLinkerSymbolAnnotations(name.GetStringRef()));
338           name_to_index.Append(name, value);
339         }
340 
341         // If the demangled name turns out to be an ObjC name, and is a category
342         // name, add the version without categories to the index too.
343         for (Language *lang : languages) {
344           for (auto variant : lang->GetMethodNameVariants(name)) {
345             if (variant.GetType() & lldb::eFunctionNameTypeSelector)
346               selector_to_index.Append(variant.GetName(), value);
347             else if (variant.GetType() & lldb::eFunctionNameTypeFull)
348               name_to_index.Append(variant.GetName(), value);
349             else if (variant.GetType() & lldb::eFunctionNameTypeMethod)
350               method_to_index.Append(variant.GetName(), value);
351             else if (variant.GetType() & lldb::eFunctionNameTypeBase)
352               basename_to_index.Append(variant.GetName(), value);
353           }
354         }
355       }
356     }
357 
358     for (const auto &record : backlog) {
359       RegisterBacklogEntry(record.first, record.second, class_contexts);
360     }
361 
362     name_to_index.Sort();
363     name_to_index.SizeToFit();
364     selector_to_index.Sort();
365     selector_to_index.SizeToFit();
366     basename_to_index.Sort();
367     basename_to_index.SizeToFit();
368     method_to_index.Sort();
369     method_to_index.SizeToFit();
370   }
371 }
372 
373 void Symtab::RegisterMangledNameEntry(
374     uint32_t value, std::set<const char *> &class_contexts,
375     std::vector<std::pair<NameToIndexMap::Entry, const char *>> &backlog,
376     RichManglingContext &rmc) {
377   // Only register functions that have a base name.
378   rmc.ParseFunctionBaseName();
379   llvm::StringRef base_name = rmc.GetBufferRef();
380   if (base_name.empty())
381     return;
382 
383   // The base name will be our entry's name.
384   NameToIndexMap::Entry entry(ConstString(base_name), value);
385 
386   rmc.ParseFunctionDeclContextName();
387   llvm::StringRef decl_context = rmc.GetBufferRef();
388 
389   // Register functions with no context.
390   if (decl_context.empty()) {
391     // This has to be a basename
392     auto &basename_to_index =
393         GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
394     basename_to_index.Append(entry);
395     // If there is no context (no namespaces or class scopes that come before
396     // the function name) then this also could be a fullname.
397     auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
398     name_to_index.Append(entry);
399     return;
400   }
401 
402   // Make sure we have a pool-string pointer and see if we already know the
403   // context name.
404   const char *decl_context_ccstr = ConstString(decl_context).GetCString();
405   auto it = class_contexts.find(decl_context_ccstr);
406 
407   auto &method_to_index =
408       GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
409   // Register constructors and destructors. They are methods and create
410   // declaration contexts.
411   if (rmc.IsCtorOrDtor()) {
412     method_to_index.Append(entry);
413     if (it == class_contexts.end())
414       class_contexts.insert(it, decl_context_ccstr);
415     return;
416   }
417 
418   // Register regular methods with a known declaration context.
419   if (it != class_contexts.end()) {
420     method_to_index.Append(entry);
421     return;
422   }
423 
424   // Regular methods in unknown declaration contexts are put to the backlog. We
425   // will revisit them once we processed all remaining symbols.
426   backlog.push_back(std::make_pair(entry, decl_context_ccstr));
427 }
428 
429 void Symtab::RegisterBacklogEntry(
430     const NameToIndexMap::Entry &entry, const char *decl_context,
431     const std::set<const char *> &class_contexts) {
432   auto &method_to_index =
433       GetNameToSymbolIndexMap(lldb::eFunctionNameTypeMethod);
434   auto it = class_contexts.find(decl_context);
435   if (it != class_contexts.end()) {
436     method_to_index.Append(entry);
437   } else {
438     // If we got here, we have something that had a context (was inside
439     // a namespace or class) yet we don't know the entry
440     method_to_index.Append(entry);
441     auto &basename_to_index =
442         GetNameToSymbolIndexMap(lldb::eFunctionNameTypeBase);
443     basename_to_index.Append(entry);
444   }
445 }
446 
447 void Symtab::PreloadSymbols() {
448   std::lock_guard<std::recursive_mutex> guard(m_mutex);
449   InitNameIndexes();
450 }
451 
452 void Symtab::AppendSymbolNamesToMap(const IndexCollection &indexes,
453                                     bool add_demangled, bool add_mangled,
454                                     NameToIndexMap &name_to_index_map) const {
455   LLDB_SCOPED_TIMER();
456   if (add_demangled || add_mangled) {
457     std::lock_guard<std::recursive_mutex> guard(m_mutex);
458 
459     // Create the name index vector to be able to quickly search by name
460     const size_t num_indexes = indexes.size();
461     for (size_t i = 0; i < num_indexes; ++i) {
462       uint32_t value = indexes[i];
463       assert(i < m_symbols.size());
464       const Symbol *symbol = &m_symbols[value];
465 
466       const Mangled &mangled = symbol->GetMangled();
467       if (add_demangled) {
468         if (ConstString name = mangled.GetDemangledName())
469           name_to_index_map.Append(name, value);
470       }
471 
472       if (add_mangled) {
473         if (ConstString name = mangled.GetMangledName())
474           name_to_index_map.Append(name, value);
475       }
476     }
477   }
478 }
479 
480 uint32_t Symtab::AppendSymbolIndexesWithType(SymbolType symbol_type,
481                                              std::vector<uint32_t> &indexes,
482                                              uint32_t start_idx,
483                                              uint32_t end_index) const {
484   std::lock_guard<std::recursive_mutex> guard(m_mutex);
485 
486   uint32_t prev_size = indexes.size();
487 
488   const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
489 
490   for (uint32_t i = start_idx; i < count; ++i) {
491     if (symbol_type == eSymbolTypeAny || m_symbols[i].GetType() == symbol_type)
492       indexes.push_back(i);
493   }
494 
495   return indexes.size() - prev_size;
496 }
497 
498 uint32_t Symtab::AppendSymbolIndexesWithTypeAndFlagsValue(
499     SymbolType symbol_type, uint32_t flags_value,
500     std::vector<uint32_t> &indexes, uint32_t start_idx,
501     uint32_t end_index) const {
502   std::lock_guard<std::recursive_mutex> guard(m_mutex);
503 
504   uint32_t prev_size = indexes.size();
505 
506   const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
507 
508   for (uint32_t i = start_idx; i < count; ++i) {
509     if ((symbol_type == eSymbolTypeAny ||
510          m_symbols[i].GetType() == symbol_type) &&
511         m_symbols[i].GetFlags() == flags_value)
512       indexes.push_back(i);
513   }
514 
515   return indexes.size() - prev_size;
516 }
517 
518 uint32_t Symtab::AppendSymbolIndexesWithType(SymbolType symbol_type,
519                                              Debug symbol_debug_type,
520                                              Visibility symbol_visibility,
521                                              std::vector<uint32_t> &indexes,
522                                              uint32_t start_idx,
523                                              uint32_t end_index) const {
524   std::lock_guard<std::recursive_mutex> guard(m_mutex);
525 
526   uint32_t prev_size = indexes.size();
527 
528   const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
529 
530   for (uint32_t i = start_idx; i < count; ++i) {
531     if (symbol_type == eSymbolTypeAny ||
532         m_symbols[i].GetType() == symbol_type) {
533       if (CheckSymbolAtIndex(i, symbol_debug_type, symbol_visibility))
534         indexes.push_back(i);
535     }
536   }
537 
538   return indexes.size() - prev_size;
539 }
540 
541 uint32_t Symtab::GetIndexForSymbol(const Symbol *symbol) const {
542   if (!m_symbols.empty()) {
543     const Symbol *first_symbol = &m_symbols[0];
544     if (symbol >= first_symbol && symbol < first_symbol + m_symbols.size())
545       return symbol - first_symbol;
546   }
547   return UINT32_MAX;
548 }
549 
550 struct SymbolSortInfo {
551   const bool sort_by_load_addr;
552   const Symbol *symbols;
553 };
554 
555 namespace {
556 struct SymbolIndexComparator {
557   const std::vector<Symbol> &symbols;
558   std::vector<lldb::addr_t> &addr_cache;
559 
560   // Getting from the symbol to the Address to the File Address involves some
561   // work. Since there are potentially many symbols here, and we're using this
562   // for sorting so we're going to be computing the address many times, cache
563   // that in addr_cache. The array passed in has to be the same size as the
564   // symbols array passed into the member variable symbols, and should be
565   // initialized with LLDB_INVALID_ADDRESS.
566   // NOTE: You have to make addr_cache externally and pass it in because
567   // std::stable_sort
568   // makes copies of the comparator it is initially passed in, and you end up
569   // spending huge amounts of time copying this array...
570 
571   SymbolIndexComparator(const std::vector<Symbol> &s,
572                         std::vector<lldb::addr_t> &a)
573       : symbols(s), addr_cache(a) {
574     assert(symbols.size() == addr_cache.size());
575   }
576   bool operator()(uint32_t index_a, uint32_t index_b) {
577     addr_t value_a = addr_cache[index_a];
578     if (value_a == LLDB_INVALID_ADDRESS) {
579       value_a = symbols[index_a].GetAddressRef().GetFileAddress();
580       addr_cache[index_a] = value_a;
581     }
582 
583     addr_t value_b = addr_cache[index_b];
584     if (value_b == LLDB_INVALID_ADDRESS) {
585       value_b = symbols[index_b].GetAddressRef().GetFileAddress();
586       addr_cache[index_b] = value_b;
587     }
588 
589     if (value_a == value_b) {
590       // The if the values are equal, use the original symbol user ID
591       lldb::user_id_t uid_a = symbols[index_a].GetID();
592       lldb::user_id_t uid_b = symbols[index_b].GetID();
593       if (uid_a < uid_b)
594         return true;
595       if (uid_a > uid_b)
596         return false;
597       return false;
598     } else if (value_a < value_b)
599       return true;
600 
601     return false;
602   }
603 };
604 }
605 
606 void Symtab::SortSymbolIndexesByValue(std::vector<uint32_t> &indexes,
607                                       bool remove_duplicates) const {
608   std::lock_guard<std::recursive_mutex> guard(m_mutex);
609   LLDB_SCOPED_TIMER();
610   // No need to sort if we have zero or one items...
611   if (indexes.size() <= 1)
612     return;
613 
614   // Sort the indexes in place using std::stable_sort.
615   // NOTE: The use of std::stable_sort instead of llvm::sort here is strictly
616   // for performance, not correctness.  The indexes vector tends to be "close"
617   // to sorted, which the stable sort handles better.
618 
619   std::vector<lldb::addr_t> addr_cache(m_symbols.size(), LLDB_INVALID_ADDRESS);
620 
621   SymbolIndexComparator comparator(m_symbols, addr_cache);
622   std::stable_sort(indexes.begin(), indexes.end(), comparator);
623 
624   // Remove any duplicates if requested
625   if (remove_duplicates) {
626     auto last = std::unique(indexes.begin(), indexes.end());
627     indexes.erase(last, indexes.end());
628   }
629 }
630 
631 uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
632                                              std::vector<uint32_t> &indexes) {
633   std::lock_guard<std::recursive_mutex> guard(m_mutex);
634 
635   LLDB_SCOPED_TIMER();
636   if (symbol_name) {
637     if (!m_name_indexes_computed)
638       InitNameIndexes();
639 
640     auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
641     return name_to_index.GetValues(symbol_name, indexes);
642   }
643   return 0;
644 }
645 
646 uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
647                                              Debug symbol_debug_type,
648                                              Visibility symbol_visibility,
649                                              std::vector<uint32_t> &indexes) {
650   std::lock_guard<std::recursive_mutex> guard(m_mutex);
651 
652   LLDB_SCOPED_TIMER();
653   if (symbol_name) {
654     const size_t old_size = indexes.size();
655     if (!m_name_indexes_computed)
656       InitNameIndexes();
657 
658     auto &name_to_index = GetNameToSymbolIndexMap(lldb::eFunctionNameTypeNone);
659     std::vector<uint32_t> all_name_indexes;
660     const size_t name_match_count =
661         name_to_index.GetValues(symbol_name, all_name_indexes);
662     for (size_t i = 0; i < name_match_count; ++i) {
663       if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type,
664                              symbol_visibility))
665         indexes.push_back(all_name_indexes[i]);
666     }
667     return indexes.size() - old_size;
668   }
669   return 0;
670 }
671 
672 uint32_t
673 Symtab::AppendSymbolIndexesWithNameAndType(ConstString symbol_name,
674                                            SymbolType symbol_type,
675                                            std::vector<uint32_t> &indexes) {
676   std::lock_guard<std::recursive_mutex> guard(m_mutex);
677 
678   if (AppendSymbolIndexesWithName(symbol_name, indexes) > 0) {
679     std::vector<uint32_t>::iterator pos = indexes.begin();
680     while (pos != indexes.end()) {
681       if (symbol_type == eSymbolTypeAny ||
682           m_symbols[*pos].GetType() == symbol_type)
683         ++pos;
684       else
685         pos = indexes.erase(pos);
686     }
687   }
688   return indexes.size();
689 }
690 
691 uint32_t Symtab::AppendSymbolIndexesWithNameAndType(
692     ConstString symbol_name, SymbolType symbol_type,
693     Debug symbol_debug_type, Visibility symbol_visibility,
694     std::vector<uint32_t> &indexes) {
695   std::lock_guard<std::recursive_mutex> guard(m_mutex);
696 
697   if (AppendSymbolIndexesWithName(symbol_name, symbol_debug_type,
698                                   symbol_visibility, indexes) > 0) {
699     std::vector<uint32_t>::iterator pos = indexes.begin();
700     while (pos != indexes.end()) {
701       if (symbol_type == eSymbolTypeAny ||
702           m_symbols[*pos].GetType() == symbol_type)
703         ++pos;
704       else
705         pos = indexes.erase(pos);
706     }
707   }
708   return indexes.size();
709 }
710 
711 uint32_t Symtab::AppendSymbolIndexesMatchingRegExAndType(
712     const RegularExpression &regexp, SymbolType symbol_type,
713     std::vector<uint32_t> &indexes) {
714   std::lock_guard<std::recursive_mutex> guard(m_mutex);
715 
716   uint32_t prev_size = indexes.size();
717   uint32_t sym_end = m_symbols.size();
718 
719   for (uint32_t i = 0; i < sym_end; i++) {
720     if (symbol_type == eSymbolTypeAny ||
721         m_symbols[i].GetType() == symbol_type) {
722       const char *name = m_symbols[i].GetName().AsCString();
723       if (name) {
724         if (regexp.Execute(name))
725           indexes.push_back(i);
726       }
727     }
728   }
729   return indexes.size() - prev_size;
730 }
731 
732 uint32_t Symtab::AppendSymbolIndexesMatchingRegExAndType(
733     const RegularExpression &regexp, SymbolType symbol_type,
734     Debug symbol_debug_type, Visibility symbol_visibility,
735     std::vector<uint32_t> &indexes) {
736   std::lock_guard<std::recursive_mutex> guard(m_mutex);
737 
738   uint32_t prev_size = indexes.size();
739   uint32_t sym_end = m_symbols.size();
740 
741   for (uint32_t i = 0; i < sym_end; i++) {
742     if (symbol_type == eSymbolTypeAny ||
743         m_symbols[i].GetType() == symbol_type) {
744       if (!CheckSymbolAtIndex(i, symbol_debug_type, symbol_visibility))
745         continue;
746 
747       const char *name = m_symbols[i].GetName().AsCString();
748       if (name) {
749         if (regexp.Execute(name))
750           indexes.push_back(i);
751       }
752     }
753   }
754   return indexes.size() - prev_size;
755 }
756 
757 Symbol *Symtab::FindSymbolWithType(SymbolType symbol_type,
758                                    Debug symbol_debug_type,
759                                    Visibility symbol_visibility,
760                                    uint32_t &start_idx) {
761   std::lock_guard<std::recursive_mutex> guard(m_mutex);
762 
763   const size_t count = m_symbols.size();
764   for (size_t idx = start_idx; idx < count; ++idx) {
765     if (symbol_type == eSymbolTypeAny ||
766         m_symbols[idx].GetType() == symbol_type) {
767       if (CheckSymbolAtIndex(idx, symbol_debug_type, symbol_visibility)) {
768         start_idx = idx;
769         return &m_symbols[idx];
770       }
771     }
772   }
773   return nullptr;
774 }
775 
776 void
777 Symtab::FindAllSymbolsWithNameAndType(ConstString name,
778                                       SymbolType symbol_type,
779                                       std::vector<uint32_t> &symbol_indexes) {
780   std::lock_guard<std::recursive_mutex> guard(m_mutex);
781 
782   LLDB_SCOPED_TIMER();
783   // Initialize all of the lookup by name indexes before converting NAME to a
784   // uniqued string NAME_STR below.
785   if (!m_name_indexes_computed)
786     InitNameIndexes();
787 
788   if (name) {
789     // The string table did have a string that matched, but we need to check
790     // the symbols and match the symbol_type if any was given.
791     AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_indexes);
792   }
793 }
794 
795 void Symtab::FindAllSymbolsWithNameAndType(
796     ConstString name, SymbolType symbol_type, Debug symbol_debug_type,
797     Visibility symbol_visibility, std::vector<uint32_t> &symbol_indexes) {
798   std::lock_guard<std::recursive_mutex> guard(m_mutex);
799 
800   LLDB_SCOPED_TIMER();
801   // Initialize all of the lookup by name indexes before converting NAME to a
802   // uniqued string NAME_STR below.
803   if (!m_name_indexes_computed)
804     InitNameIndexes();
805 
806   if (name) {
807     // The string table did have a string that matched, but we need to check
808     // the symbols and match the symbol_type if any was given.
809     AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_debug_type,
810                                        symbol_visibility, symbol_indexes);
811   }
812 }
813 
814 void Symtab::FindAllSymbolsMatchingRexExAndType(
815     const RegularExpression &regex, SymbolType symbol_type,
816     Debug symbol_debug_type, Visibility symbol_visibility,
817     std::vector<uint32_t> &symbol_indexes) {
818   std::lock_guard<std::recursive_mutex> guard(m_mutex);
819 
820   AppendSymbolIndexesMatchingRegExAndType(regex, symbol_type, symbol_debug_type,
821                                           symbol_visibility, symbol_indexes);
822 }
823 
824 Symbol *Symtab::FindFirstSymbolWithNameAndType(ConstString name,
825                                                SymbolType symbol_type,
826                                                Debug symbol_debug_type,
827                                                Visibility symbol_visibility) {
828   std::lock_guard<std::recursive_mutex> guard(m_mutex);
829   LLDB_SCOPED_TIMER();
830   if (!m_name_indexes_computed)
831     InitNameIndexes();
832 
833   if (name) {
834     std::vector<uint32_t> matching_indexes;
835     // The string table did have a string that matched, but we need to check
836     // the symbols and match the symbol_type if any was given.
837     if (AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_debug_type,
838                                            symbol_visibility,
839                                            matching_indexes)) {
840       std::vector<uint32_t>::const_iterator pos, end = matching_indexes.end();
841       for (pos = matching_indexes.begin(); pos != end; ++pos) {
842         Symbol *symbol = SymbolAtIndex(*pos);
843 
844         if (symbol->Compare(name, symbol_type))
845           return symbol;
846       }
847     }
848   }
849   return nullptr;
850 }
851 
852 typedef struct {
853   const Symtab *symtab;
854   const addr_t file_addr;
855   Symbol *match_symbol;
856   const uint32_t *match_index_ptr;
857   addr_t match_offset;
858 } SymbolSearchInfo;
859 
860 // Add all the section file start address & size to the RangeVector, recusively
861 // adding any children sections.
862 static void AddSectionsToRangeMap(SectionList *sectlist,
863                                   RangeVector<addr_t, addr_t> &section_ranges) {
864   const int num_sections = sectlist->GetNumSections(0);
865   for (int i = 0; i < num_sections; i++) {
866     SectionSP sect_sp = sectlist->GetSectionAtIndex(i);
867     if (sect_sp) {
868       SectionList &child_sectlist = sect_sp->GetChildren();
869 
870       // If this section has children, add the children to the RangeVector.
871       // Else add this section to the RangeVector.
872       if (child_sectlist.GetNumSections(0) > 0) {
873         AddSectionsToRangeMap(&child_sectlist, section_ranges);
874       } else {
875         size_t size = sect_sp->GetByteSize();
876         if (size > 0) {
877           addr_t base_addr = sect_sp->GetFileAddress();
878           RangeVector<addr_t, addr_t>::Entry entry;
879           entry.SetRangeBase(base_addr);
880           entry.SetByteSize(size);
881           section_ranges.Append(entry);
882         }
883       }
884     }
885   }
886 }
887 
888 void Symtab::InitAddressIndexes() {
889   // Protected function, no need to lock mutex...
890   if (!m_file_addr_to_index_computed && !m_symbols.empty()) {
891     m_file_addr_to_index_computed = true;
892 
893     FileRangeToIndexMap::Entry entry;
894     const_iterator begin = m_symbols.begin();
895     const_iterator end = m_symbols.end();
896     for (const_iterator pos = m_symbols.begin(); pos != end; ++pos) {
897       if (pos->ValueIsAddress()) {
898         entry.SetRangeBase(pos->GetAddressRef().GetFileAddress());
899         entry.SetByteSize(pos->GetByteSize());
900         entry.data = std::distance(begin, pos);
901         m_file_addr_to_index.Append(entry);
902       }
903     }
904     const size_t num_entries = m_file_addr_to_index.GetSize();
905     if (num_entries > 0) {
906       m_file_addr_to_index.Sort();
907 
908       // Create a RangeVector with the start & size of all the sections for
909       // this objfile.  We'll need to check this for any FileRangeToIndexMap
910       // entries with an uninitialized size, which could potentially be a large
911       // number so reconstituting the weak pointer is busywork when it is
912       // invariant information.
913       SectionList *sectlist = m_objfile->GetSectionList();
914       RangeVector<addr_t, addr_t> section_ranges;
915       if (sectlist) {
916         AddSectionsToRangeMap(sectlist, section_ranges);
917         section_ranges.Sort();
918       }
919 
920       // Iterate through the FileRangeToIndexMap and fill in the size for any
921       // entries that didn't already have a size from the Symbol (e.g. if we
922       // have a plain linker symbol with an address only, instead of debug info
923       // where we get an address and a size and a type, etc.)
924       for (size_t i = 0; i < num_entries; i++) {
925         FileRangeToIndexMap::Entry *entry =
926             m_file_addr_to_index.GetMutableEntryAtIndex(i);
927         if (entry->GetByteSize() == 0) {
928           addr_t curr_base_addr = entry->GetRangeBase();
929           const RangeVector<addr_t, addr_t>::Entry *containing_section =
930               section_ranges.FindEntryThatContains(curr_base_addr);
931 
932           // Use the end of the section as the default max size of the symbol
933           addr_t sym_size = 0;
934           if (containing_section) {
935             sym_size =
936                 containing_section->GetByteSize() -
937                 (entry->GetRangeBase() - containing_section->GetRangeBase());
938           }
939 
940           for (size_t j = i; j < num_entries; j++) {
941             FileRangeToIndexMap::Entry *next_entry =
942                 m_file_addr_to_index.GetMutableEntryAtIndex(j);
943             addr_t next_base_addr = next_entry->GetRangeBase();
944             if (next_base_addr > curr_base_addr) {
945               addr_t size_to_next_symbol = next_base_addr - curr_base_addr;
946 
947               // Take the difference between this symbol and the next one as
948               // its size, if it is less than the size of the section.
949               if (sym_size == 0 || size_to_next_symbol < sym_size) {
950                 sym_size = size_to_next_symbol;
951               }
952               break;
953             }
954           }
955 
956           if (sym_size > 0) {
957             entry->SetByteSize(sym_size);
958             Symbol &symbol = m_symbols[entry->data];
959             symbol.SetByteSize(sym_size);
960             symbol.SetSizeIsSynthesized(true);
961           }
962         }
963       }
964 
965       // Sort again in case the range size changes the ordering
966       m_file_addr_to_index.Sort();
967     }
968   }
969 }
970 
971 void Symtab::CalculateSymbolSizes() {
972   std::lock_guard<std::recursive_mutex> guard(m_mutex);
973   // Size computation happens inside InitAddressIndexes.
974   InitAddressIndexes();
975 }
976 
977 Symbol *Symtab::FindSymbolAtFileAddress(addr_t file_addr) {
978   std::lock_guard<std::recursive_mutex> guard(m_mutex);
979   if (!m_file_addr_to_index_computed)
980     InitAddressIndexes();
981 
982   const FileRangeToIndexMap::Entry *entry =
983       m_file_addr_to_index.FindEntryStartsAt(file_addr);
984   if (entry) {
985     Symbol *symbol = SymbolAtIndex(entry->data);
986     if (symbol->GetFileAddress() == file_addr)
987       return symbol;
988   }
989   return nullptr;
990 }
991 
992 Symbol *Symtab::FindSymbolContainingFileAddress(addr_t file_addr) {
993   std::lock_guard<std::recursive_mutex> guard(m_mutex);
994 
995   if (!m_file_addr_to_index_computed)
996     InitAddressIndexes();
997 
998   const FileRangeToIndexMap::Entry *entry =
999       m_file_addr_to_index.FindEntryThatContains(file_addr);
1000   if (entry) {
1001     Symbol *symbol = SymbolAtIndex(entry->data);
1002     if (symbol->ContainsFileAddress(file_addr))
1003       return symbol;
1004   }
1005   return nullptr;
1006 }
1007 
1008 void Symtab::ForEachSymbolContainingFileAddress(
1009     addr_t file_addr, std::function<bool(Symbol *)> const &callback) {
1010   std::lock_guard<std::recursive_mutex> guard(m_mutex);
1011 
1012   if (!m_file_addr_to_index_computed)
1013     InitAddressIndexes();
1014 
1015   std::vector<uint32_t> all_addr_indexes;
1016 
1017   // Get all symbols with file_addr
1018   const size_t addr_match_count =
1019       m_file_addr_to_index.FindEntryIndexesThatContain(file_addr,
1020                                                        all_addr_indexes);
1021 
1022   for (size_t i = 0; i < addr_match_count; ++i) {
1023     Symbol *symbol = SymbolAtIndex(all_addr_indexes[i]);
1024     if (symbol->ContainsFileAddress(file_addr)) {
1025       if (!callback(symbol))
1026         break;
1027     }
1028   }
1029 }
1030 
1031 void Symtab::SymbolIndicesToSymbolContextList(
1032     std::vector<uint32_t> &symbol_indexes, SymbolContextList &sc_list) {
1033   // No need to protect this call using m_mutex all other method calls are
1034   // already thread safe.
1035 
1036   const bool merge_symbol_into_function = true;
1037   size_t num_indices = symbol_indexes.size();
1038   if (num_indices > 0) {
1039     SymbolContext sc;
1040     sc.module_sp = m_objfile->GetModule();
1041     for (size_t i = 0; i < num_indices; i++) {
1042       sc.symbol = SymbolAtIndex(symbol_indexes[i]);
1043       if (sc.symbol)
1044         sc_list.AppendIfUnique(sc, merge_symbol_into_function);
1045     }
1046   }
1047 }
1048 
1049 void Symtab::FindFunctionSymbols(ConstString name, uint32_t name_type_mask,
1050                                  SymbolContextList &sc_list) {
1051   std::vector<uint32_t> symbol_indexes;
1052 
1053   // eFunctionNameTypeAuto should be pre-resolved by a call to
1054   // Module::LookupInfo::LookupInfo()
1055   assert((name_type_mask & eFunctionNameTypeAuto) == 0);
1056 
1057   if (name_type_mask & (eFunctionNameTypeBase | eFunctionNameTypeFull)) {
1058     std::vector<uint32_t> temp_symbol_indexes;
1059     FindAllSymbolsWithNameAndType(name, eSymbolTypeAny, temp_symbol_indexes);
1060 
1061     unsigned temp_symbol_indexes_size = temp_symbol_indexes.size();
1062     if (temp_symbol_indexes_size > 0) {
1063       std::lock_guard<std::recursive_mutex> guard(m_mutex);
1064       for (unsigned i = 0; i < temp_symbol_indexes_size; i++) {
1065         SymbolContext sym_ctx;
1066         sym_ctx.symbol = SymbolAtIndex(temp_symbol_indexes[i]);
1067         if (sym_ctx.symbol) {
1068           switch (sym_ctx.symbol->GetType()) {
1069           case eSymbolTypeCode:
1070           case eSymbolTypeResolver:
1071           case eSymbolTypeReExported:
1072             symbol_indexes.push_back(temp_symbol_indexes[i]);
1073             break;
1074           default:
1075             break;
1076           }
1077         }
1078       }
1079     }
1080   }
1081 
1082   if (!m_name_indexes_computed)
1083     InitNameIndexes();
1084 
1085   for (lldb::FunctionNameType type :
1086        {lldb::eFunctionNameTypeBase, lldb::eFunctionNameTypeMethod,
1087         lldb::eFunctionNameTypeSelector}) {
1088     if (name_type_mask & type) {
1089       auto map = GetNameToSymbolIndexMap(type);
1090 
1091       const UniqueCStringMap<uint32_t>::Entry *match;
1092       for (match = map.FindFirstValueForName(name); match != nullptr;
1093            match = map.FindNextValueForName(match)) {
1094         symbol_indexes.push_back(match->value);
1095       }
1096     }
1097   }
1098 
1099   if (!symbol_indexes.empty()) {
1100     llvm::sort(symbol_indexes.begin(), symbol_indexes.end());
1101     symbol_indexes.erase(
1102         std::unique(symbol_indexes.begin(), symbol_indexes.end()),
1103         symbol_indexes.end());
1104     SymbolIndicesToSymbolContextList(symbol_indexes, sc_list);
1105   }
1106 }
1107 
1108 const Symbol *Symtab::GetParent(Symbol *child_symbol) const {
1109   uint32_t child_idx = GetIndexForSymbol(child_symbol);
1110   if (child_idx != UINT32_MAX && child_idx > 0) {
1111     for (uint32_t idx = child_idx - 1; idx != UINT32_MAX; --idx) {
1112       const Symbol *symbol = SymbolAtIndex(idx);
1113       const uint32_t sibling_idx = symbol->GetSiblingIndex();
1114       if (sibling_idx != UINT32_MAX && sibling_idx > child_idx)
1115         return symbol;
1116     }
1117   }
1118   return nullptr;
1119 }
1120