1 //===-- Symtab.cpp ----------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include <map>
10 #include <set>
11 
12 #include "Plugins/Language/ObjC/ObjCLanguage.h"
13 
14 #include "lldb/Core/Module.h"
15 #include "lldb/Core/RichManglingContext.h"
16 #include "lldb/Core/STLUtils.h"
17 #include "lldb/Core/Section.h"
18 #include "lldb/Symbol/ObjectFile.h"
19 #include "lldb/Symbol/Symbol.h"
20 #include "lldb/Symbol/SymbolContext.h"
21 #include "lldb/Symbol/Symtab.h"
22 #include "lldb/Utility/RegularExpression.h"
23 #include "lldb/Utility/Stream.h"
24 #include "lldb/Utility/Timer.h"
25 
26 #include "llvm/ADT/StringRef.h"
27 
28 using namespace lldb;
29 using namespace lldb_private;
30 
31 Symtab::Symtab(ObjectFile *objfile)
32     : m_objfile(objfile), m_symbols(), m_file_addr_to_index(),
33       m_name_to_index(), m_mutex(), m_file_addr_to_index_computed(false),
34       m_name_indexes_computed(false) {}
35 
36 Symtab::~Symtab() {}
37 
38 void Symtab::Reserve(size_t count) {
39   // Clients should grab the mutex from this symbol table and lock it manually
40   // when calling this function to avoid performance issues.
41   m_symbols.reserve(count);
42 }
43 
44 Symbol *Symtab::Resize(size_t count) {
45   // Clients should grab the mutex from this symbol table and lock it manually
46   // when calling this function to avoid performance issues.
47   m_symbols.resize(count);
48   return m_symbols.empty() ? nullptr : &m_symbols[0];
49 }
50 
51 uint32_t Symtab::AddSymbol(const Symbol &symbol) {
52   // Clients should grab the mutex from this symbol table and lock it manually
53   // when calling this function to avoid performance issues.
54   uint32_t symbol_idx = m_symbols.size();
55   m_name_to_index.Clear();
56   m_file_addr_to_index.Clear();
57   m_symbols.push_back(symbol);
58   m_file_addr_to_index_computed = false;
59   m_name_indexes_computed = false;
60   return symbol_idx;
61 }
62 
63 size_t Symtab::GetNumSymbols() const {
64   std::lock_guard<std::recursive_mutex> guard(m_mutex);
65   return m_symbols.size();
66 }
67 
68 void Symtab::SectionFileAddressesChanged() {
69   m_name_to_index.Clear();
70   m_file_addr_to_index_computed = false;
71 }
72 
73 void Symtab::Dump(Stream *s, Target *target, SortOrder sort_order) {
74   std::lock_guard<std::recursive_mutex> guard(m_mutex);
75 
76   //    s->Printf("%.*p: ", (int)sizeof(void*) * 2, this);
77   s->Indent();
78   const FileSpec &file_spec = m_objfile->GetFileSpec();
79   const char *object_name = nullptr;
80   if (m_objfile->GetModule())
81     object_name = m_objfile->GetModule()->GetObjectName().GetCString();
82 
83   if (file_spec)
84     s->Printf("Symtab, file = %s%s%s%s, num_symbols = %" PRIu64,
85               file_spec.GetPath().c_str(), object_name ? "(" : "",
86               object_name ? object_name : "", object_name ? ")" : "",
87               (uint64_t)m_symbols.size());
88   else
89     s->Printf("Symtab, num_symbols = %" PRIu64 "", (uint64_t)m_symbols.size());
90 
91   if (!m_symbols.empty()) {
92     switch (sort_order) {
93     case eSortOrderNone: {
94       s->PutCString(":\n");
95       DumpSymbolHeader(s);
96       const_iterator begin = m_symbols.begin();
97       const_iterator end = m_symbols.end();
98       for (const_iterator pos = m_symbols.begin(); pos != end; ++pos) {
99         s->Indent();
100         pos->Dump(s, target, std::distance(begin, pos));
101       }
102     } break;
103 
104     case eSortOrderByName: {
105       // Although we maintain a lookup by exact name map, the table isn't
106       // sorted by name. So we must make the ordered symbol list up ourselves.
107       s->PutCString(" (sorted by name):\n");
108       DumpSymbolHeader(s);
109       typedef std::multimap<const char *, const Symbol *,
110                             CStringCompareFunctionObject>
111           CStringToSymbol;
112       CStringToSymbol name_map;
113       for (const_iterator pos = m_symbols.begin(), end = m_symbols.end();
114            pos != end; ++pos) {
115         const char *name = pos->GetName().AsCString();
116         if (name && name[0])
117           name_map.insert(std::make_pair(name, &(*pos)));
118       }
119 
120       for (CStringToSymbol::const_iterator pos = name_map.begin(),
121                                            end = name_map.end();
122            pos != end; ++pos) {
123         s->Indent();
124         pos->second->Dump(s, target, pos->second - &m_symbols[0]);
125       }
126     } break;
127 
128     case eSortOrderByAddress:
129       s->PutCString(" (sorted by address):\n");
130       DumpSymbolHeader(s);
131       if (!m_file_addr_to_index_computed)
132         InitAddressIndexes();
133       const size_t num_entries = m_file_addr_to_index.GetSize();
134       for (size_t i = 0; i < num_entries; ++i) {
135         s->Indent();
136         const uint32_t symbol_idx = m_file_addr_to_index.GetEntryRef(i).data;
137         m_symbols[symbol_idx].Dump(s, target, symbol_idx);
138       }
139       break;
140     }
141   } else {
142     s->PutCString("\n");
143   }
144 }
145 
146 void Symtab::Dump(Stream *s, Target *target,
147                   std::vector<uint32_t> &indexes) const {
148   std::lock_guard<std::recursive_mutex> guard(m_mutex);
149 
150   const size_t num_symbols = GetNumSymbols();
151   // s->Printf("%.*p: ", (int)sizeof(void*) * 2, this);
152   s->Indent();
153   s->Printf("Symtab %" PRIu64 " symbol indexes (%" PRIu64 " symbols total):\n",
154             (uint64_t)indexes.size(), (uint64_t)m_symbols.size());
155   s->IndentMore();
156 
157   if (!indexes.empty()) {
158     std::vector<uint32_t>::const_iterator pos;
159     std::vector<uint32_t>::const_iterator end = indexes.end();
160     DumpSymbolHeader(s);
161     for (pos = indexes.begin(); pos != end; ++pos) {
162       size_t idx = *pos;
163       if (idx < num_symbols) {
164         s->Indent();
165         m_symbols[idx].Dump(s, target, idx);
166       }
167     }
168   }
169   s->IndentLess();
170 }
171 
172 void Symtab::DumpSymbolHeader(Stream *s) {
173   s->Indent("               Debug symbol\n");
174   s->Indent("               |Synthetic symbol\n");
175   s->Indent("               ||Externally Visible\n");
176   s->Indent("               |||\n");
177   s->Indent("Index   UserID DSX Type            File Address/Value Load "
178             "Address       Size               Flags      Name\n");
179   s->Indent("------- ------ --- --------------- ------------------ "
180             "------------------ ------------------ ---------- "
181             "----------------------------------\n");
182 }
183 
184 static int CompareSymbolID(const void *key, const void *p) {
185   const user_id_t match_uid = *(const user_id_t *)key;
186   const user_id_t symbol_uid = ((const Symbol *)p)->GetID();
187   if (match_uid < symbol_uid)
188     return -1;
189   if (match_uid > symbol_uid)
190     return 1;
191   return 0;
192 }
193 
194 Symbol *Symtab::FindSymbolByID(lldb::user_id_t symbol_uid) const {
195   std::lock_guard<std::recursive_mutex> guard(m_mutex);
196 
197   Symbol *symbol =
198       (Symbol *)::bsearch(&symbol_uid, &m_symbols[0], m_symbols.size(),
199                           sizeof(m_symbols[0]), CompareSymbolID);
200   return symbol;
201 }
202 
203 Symbol *Symtab::SymbolAtIndex(size_t idx) {
204   // Clients should grab the mutex from this symbol table and lock it manually
205   // when calling this function to avoid performance issues.
206   if (idx < m_symbols.size())
207     return &m_symbols[idx];
208   return nullptr;
209 }
210 
211 const Symbol *Symtab::SymbolAtIndex(size_t idx) const {
212   // Clients should grab the mutex from this symbol table and lock it manually
213   // when calling this function to avoid performance issues.
214   if (idx < m_symbols.size())
215     return &m_symbols[idx];
216   return nullptr;
217 }
218 
219 //----------------------------------------------------------------------
220 // InitNameIndexes
221 //----------------------------------------------------------------------
222 static bool lldb_skip_name(llvm::StringRef mangled,
223                            Mangled::ManglingScheme scheme) {
224   switch (scheme) {
225   case Mangled::eManglingSchemeItanium: {
226     if (mangled.size() < 3 || !mangled.startswith("_Z"))
227       return true;
228 
229     // Avoid the following types of symbols in the index.
230     switch (mangled[2]) {
231     case 'G': // guard variables
232     case 'T': // virtual tables, VTT structures, typeinfo structures + names
233     case 'Z': // named local entities (if we eventually handle
234               // eSymbolTypeData, we will want this back)
235       return true;
236 
237     default:
238       break;
239     }
240 
241     // Include this name in the index.
242     return false;
243   }
244 
245   // No filters for this scheme yet. Include all names in indexing.
246   case Mangled::eManglingSchemeMSVC:
247     return false;
248 
249   // Don't try and demangle things we can't categorize.
250   case Mangled::eManglingSchemeNone:
251     return true;
252   }
253   llvm_unreachable("unknown scheme!");
254 }
255 
256 void Symtab::InitNameIndexes() {
257   // Protected function, no need to lock mutex...
258   if (!m_name_indexes_computed) {
259     m_name_indexes_computed = true;
260     static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
261     Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
262     // Create the name index vector to be able to quickly search by name
263     const size_t num_symbols = m_symbols.size();
264 #if 1
265     m_name_to_index.Reserve(num_symbols);
266 #else
267     // TODO: benchmark this to see if we save any memory. Otherwise we
268     // will always keep the memory reserved in the vector unless we pull some
269     // STL swap magic and then recopy...
270     uint32_t actual_count = 0;
271     for (const_iterator pos = m_symbols.begin(), end = m_symbols.end();
272          pos != end; ++pos) {
273       const Mangled &mangled = pos->GetMangled();
274       if (mangled.GetMangledName())
275         ++actual_count;
276 
277       if (mangled.GetDemangledName())
278         ++actual_count;
279     }
280 
281     m_name_to_index.Reserve(actual_count);
282 #endif
283 
284     // The "const char *" in "class_contexts" and backlog::value_type::second
285     // must come from a ConstString::GetCString()
286     std::set<const char *> class_contexts;
287     std::vector<std::pair<NameToIndexMap::Entry, const char *>> backlog;
288     backlog.reserve(num_symbols / 2);
289 
290     // Instantiation of the demangler is expensive, so better use a single one
291     // for all entries during batch processing.
292     RichManglingContext rmc;
293     NameToIndexMap::Entry entry;
294 
295     for (entry.value = 0; entry.value < num_symbols; ++entry.value) {
296       Symbol *symbol = &m_symbols[entry.value];
297 
298       // Don't let trampolines get into the lookup by name map If we ever need
299       // the trampoline symbols to be searchable by name we can remove this and
300       // then possibly add a new bool to any of the Symtab functions that
301       // lookup symbols by name to indicate if they want trampolines.
302       if (symbol->IsTrampoline())
303         continue;
304 
305       // If the symbol's name string matched a Mangled::ManglingScheme, it is
306       // stored in the mangled field.
307       Mangled &mangled = symbol->GetMangled();
308       entry.cstring = mangled.GetMangledName();
309       if (entry.cstring) {
310         m_name_to_index.Append(entry);
311 
312         if (symbol->ContainsLinkerAnnotations()) {
313           // If the symbol has linker annotations, also add the version without
314           // the annotations.
315           entry.cstring = ConstString(m_objfile->StripLinkerSymbolAnnotations(
316                                         entry.cstring.GetStringRef()));
317           m_name_to_index.Append(entry);
318         }
319 
320         const SymbolType type = symbol->GetType();
321         if (type == eSymbolTypeCode || type == eSymbolTypeResolver) {
322           if (mangled.DemangleWithRichManglingInfo(rmc, lldb_skip_name))
323             RegisterMangledNameEntry(entry, class_contexts, backlog, rmc);
324         }
325       }
326 
327       // Symbol name strings that didn't match a Mangled::ManglingScheme, are
328       // stored in the demangled field.
329       entry.cstring = mangled.GetDemangledName(symbol->GetLanguage());
330       if (entry.cstring) {
331         m_name_to_index.Append(entry);
332 
333         if (symbol->ContainsLinkerAnnotations()) {
334           // If the symbol has linker annotations, also add the version without
335           // the annotations.
336           entry.cstring = ConstString(m_objfile->StripLinkerSymbolAnnotations(
337                                         entry.cstring.GetStringRef()));
338           m_name_to_index.Append(entry);
339         }
340       }
341 
342       // If the demangled name turns out to be an ObjC name, and is a category
343       // name, add the version without categories to the index too.
344       ObjCLanguage::MethodName objc_method(entry.cstring.GetStringRef(), true);
345       if (objc_method.IsValid(true)) {
346         entry.cstring = objc_method.GetSelector();
347         m_selector_to_index.Append(entry);
348 
349         ConstString objc_method_no_category(
350             objc_method.GetFullNameWithoutCategory(true));
351         if (objc_method_no_category) {
352           entry.cstring = objc_method_no_category;
353           m_name_to_index.Append(entry);
354         }
355       }
356     }
357 
358     for (const auto &record : backlog) {
359       RegisterBacklogEntry(record.first, record.second, class_contexts);
360     }
361 
362     m_name_to_index.Sort();
363     m_name_to_index.SizeToFit();
364     m_selector_to_index.Sort();
365     m_selector_to_index.SizeToFit();
366     m_basename_to_index.Sort();
367     m_basename_to_index.SizeToFit();
368     m_method_to_index.Sort();
369     m_method_to_index.SizeToFit();
370   }
371 }
372 
373 void Symtab::RegisterMangledNameEntry(
374     NameToIndexMap::Entry &entry, std::set<const char *> &class_contexts,
375     std::vector<std::pair<NameToIndexMap::Entry, const char *>> &backlog,
376     RichManglingContext &rmc) {
377   // Only register functions that have a base name.
378   rmc.ParseFunctionBaseName();
379   llvm::StringRef base_name = rmc.GetBufferRef();
380   if (base_name.empty())
381     return;
382 
383   // The base name will be our entry's name.
384   entry.cstring = ConstString(base_name);
385 
386   rmc.ParseFunctionDeclContextName();
387   llvm::StringRef decl_context = rmc.GetBufferRef();
388 
389   // Register functions with no context.
390   if (decl_context.empty()) {
391     // This has to be a basename
392     m_basename_to_index.Append(entry);
393     // If there is no context (no namespaces or class scopes that come before
394     // the function name) then this also could be a fullname.
395     m_name_to_index.Append(entry);
396     return;
397   }
398 
399   // Make sure we have a pool-string pointer and see if we already know the
400   // context name.
401   const char *decl_context_ccstr = ConstString(decl_context).GetCString();
402   auto it = class_contexts.find(decl_context_ccstr);
403 
404   // Register constructors and destructors. They are methods and create
405   // declaration contexts.
406   if (rmc.IsCtorOrDtor()) {
407     m_method_to_index.Append(entry);
408     if (it == class_contexts.end())
409       class_contexts.insert(it, decl_context_ccstr);
410     return;
411   }
412 
413   // Register regular methods with a known declaration context.
414   if (it != class_contexts.end()) {
415     m_method_to_index.Append(entry);
416     return;
417   }
418 
419   // Regular methods in unknown declaration contexts are put to the backlog. We
420   // will revisit them once we processed all remaining symbols.
421   backlog.push_back(std::make_pair(entry, decl_context_ccstr));
422 }
423 
424 void Symtab::RegisterBacklogEntry(
425     const NameToIndexMap::Entry &entry, const char *decl_context,
426     const std::set<const char *> &class_contexts) {
427   auto it = class_contexts.find(decl_context);
428   if (it != class_contexts.end()) {
429     m_method_to_index.Append(entry);
430   } else {
431     // If we got here, we have something that had a context (was inside
432     // a namespace or class) yet we don't know the entry
433     m_method_to_index.Append(entry);
434     m_basename_to_index.Append(entry);
435   }
436 }
437 
438 void Symtab::PreloadSymbols() {
439   std::lock_guard<std::recursive_mutex> guard(m_mutex);
440   InitNameIndexes();
441 }
442 
443 void Symtab::AppendSymbolNamesToMap(const IndexCollection &indexes,
444                                     bool add_demangled, bool add_mangled,
445                                     NameToIndexMap &name_to_index_map) const {
446   if (add_demangled || add_mangled) {
447     static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
448     Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
449     std::lock_guard<std::recursive_mutex> guard(m_mutex);
450 
451     // Create the name index vector to be able to quickly search by name
452     NameToIndexMap::Entry entry;
453     const size_t num_indexes = indexes.size();
454     for (size_t i = 0; i < num_indexes; ++i) {
455       entry.value = indexes[i];
456       assert(i < m_symbols.size());
457       const Symbol *symbol = &m_symbols[entry.value];
458 
459       const Mangled &mangled = symbol->GetMangled();
460       if (add_demangled) {
461         entry.cstring = mangled.GetDemangledName(symbol->GetLanguage());
462         if (entry.cstring)
463           name_to_index_map.Append(entry);
464       }
465 
466       if (add_mangled) {
467         entry.cstring = mangled.GetMangledName();
468         if (entry.cstring)
469           name_to_index_map.Append(entry);
470       }
471     }
472   }
473 }
474 
475 uint32_t Symtab::AppendSymbolIndexesWithType(SymbolType symbol_type,
476                                              std::vector<uint32_t> &indexes,
477                                              uint32_t start_idx,
478                                              uint32_t end_index) const {
479   std::lock_guard<std::recursive_mutex> guard(m_mutex);
480 
481   uint32_t prev_size = indexes.size();
482 
483   const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
484 
485   for (uint32_t i = start_idx; i < count; ++i) {
486     if (symbol_type == eSymbolTypeAny || m_symbols[i].GetType() == symbol_type)
487       indexes.push_back(i);
488   }
489 
490   return indexes.size() - prev_size;
491 }
492 
493 uint32_t Symtab::AppendSymbolIndexesWithTypeAndFlagsValue(
494     SymbolType symbol_type, uint32_t flags_value,
495     std::vector<uint32_t> &indexes, uint32_t start_idx,
496     uint32_t end_index) const {
497   std::lock_guard<std::recursive_mutex> guard(m_mutex);
498 
499   uint32_t prev_size = indexes.size();
500 
501   const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
502 
503   for (uint32_t i = start_idx; i < count; ++i) {
504     if ((symbol_type == eSymbolTypeAny ||
505          m_symbols[i].GetType() == symbol_type) &&
506         m_symbols[i].GetFlags() == flags_value)
507       indexes.push_back(i);
508   }
509 
510   return indexes.size() - prev_size;
511 }
512 
513 uint32_t Symtab::AppendSymbolIndexesWithType(SymbolType symbol_type,
514                                              Debug symbol_debug_type,
515                                              Visibility symbol_visibility,
516                                              std::vector<uint32_t> &indexes,
517                                              uint32_t start_idx,
518                                              uint32_t end_index) const {
519   std::lock_guard<std::recursive_mutex> guard(m_mutex);
520 
521   uint32_t prev_size = indexes.size();
522 
523   const uint32_t count = std::min<uint32_t>(m_symbols.size(), end_index);
524 
525   for (uint32_t i = start_idx; i < count; ++i) {
526     if (symbol_type == eSymbolTypeAny ||
527         m_symbols[i].GetType() == symbol_type) {
528       if (CheckSymbolAtIndex(i, symbol_debug_type, symbol_visibility))
529         indexes.push_back(i);
530     }
531   }
532 
533   return indexes.size() - prev_size;
534 }
535 
536 uint32_t Symtab::GetIndexForSymbol(const Symbol *symbol) const {
537   if (!m_symbols.empty()) {
538     const Symbol *first_symbol = &m_symbols[0];
539     if (symbol >= first_symbol && symbol < first_symbol + m_symbols.size())
540       return symbol - first_symbol;
541   }
542   return UINT32_MAX;
543 }
544 
545 struct SymbolSortInfo {
546   const bool sort_by_load_addr;
547   const Symbol *symbols;
548 };
549 
550 namespace {
551 struct SymbolIndexComparator {
552   const std::vector<Symbol> &symbols;
553   std::vector<lldb::addr_t> &addr_cache;
554 
555   // Getting from the symbol to the Address to the File Address involves some
556   // work. Since there are potentially many symbols here, and we're using this
557   // for sorting so we're going to be computing the address many times, cache
558   // that in addr_cache. The array passed in has to be the same size as the
559   // symbols array passed into the member variable symbols, and should be
560   // initialized with LLDB_INVALID_ADDRESS.
561   // NOTE: You have to make addr_cache externally and pass it in because
562   // std::stable_sort
563   // makes copies of the comparator it is initially passed in, and you end up
564   // spending huge amounts of time copying this array...
565 
566   SymbolIndexComparator(const std::vector<Symbol> &s,
567                         std::vector<lldb::addr_t> &a)
568       : symbols(s), addr_cache(a) {
569     assert(symbols.size() == addr_cache.size());
570   }
571   bool operator()(uint32_t index_a, uint32_t index_b) {
572     addr_t value_a = addr_cache[index_a];
573     if (value_a == LLDB_INVALID_ADDRESS) {
574       value_a = symbols[index_a].GetAddressRef().GetFileAddress();
575       addr_cache[index_a] = value_a;
576     }
577 
578     addr_t value_b = addr_cache[index_b];
579     if (value_b == LLDB_INVALID_ADDRESS) {
580       value_b = symbols[index_b].GetAddressRef().GetFileAddress();
581       addr_cache[index_b] = value_b;
582     }
583 
584     if (value_a == value_b) {
585       // The if the values are equal, use the original symbol user ID
586       lldb::user_id_t uid_a = symbols[index_a].GetID();
587       lldb::user_id_t uid_b = symbols[index_b].GetID();
588       if (uid_a < uid_b)
589         return true;
590       if (uid_a > uid_b)
591         return false;
592       return false;
593     } else if (value_a < value_b)
594       return true;
595 
596     return false;
597   }
598 };
599 }
600 
601 void Symtab::SortSymbolIndexesByValue(std::vector<uint32_t> &indexes,
602                                       bool remove_duplicates) const {
603   std::lock_guard<std::recursive_mutex> guard(m_mutex);
604 
605   static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
606   Timer scoped_timer(func_cat, LLVM_PRETTY_FUNCTION);
607   // No need to sort if we have zero or one items...
608   if (indexes.size() <= 1)
609     return;
610 
611   // Sort the indexes in place using std::stable_sort.
612   // NOTE: The use of std::stable_sort instead of llvm::sort here is strictly
613   // for performance, not correctness.  The indexes vector tends to be "close"
614   // to sorted, which the stable sort handles better.
615 
616   std::vector<lldb::addr_t> addr_cache(m_symbols.size(), LLDB_INVALID_ADDRESS);
617 
618   SymbolIndexComparator comparator(m_symbols, addr_cache);
619   std::stable_sort(indexes.begin(), indexes.end(), comparator);
620 
621   // Remove any duplicates if requested
622   if (remove_duplicates) {
623     auto last = std::unique(indexes.begin(), indexes.end());
624     indexes.erase(last, indexes.end());
625   }
626 }
627 
628 uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
629                                              std::vector<uint32_t> &indexes) {
630   std::lock_guard<std::recursive_mutex> guard(m_mutex);
631 
632   static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
633   Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
634   if (symbol_name) {
635     if (!m_name_indexes_computed)
636       InitNameIndexes();
637 
638     return m_name_to_index.GetValues(symbol_name, indexes);
639   }
640   return 0;
641 }
642 
643 uint32_t Symtab::AppendSymbolIndexesWithName(ConstString symbol_name,
644                                              Debug symbol_debug_type,
645                                              Visibility symbol_visibility,
646                                              std::vector<uint32_t> &indexes) {
647   std::lock_guard<std::recursive_mutex> guard(m_mutex);
648 
649   static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
650   Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
651   if (symbol_name) {
652     const size_t old_size = indexes.size();
653     if (!m_name_indexes_computed)
654       InitNameIndexes();
655 
656     std::vector<uint32_t> all_name_indexes;
657     const size_t name_match_count =
658         m_name_to_index.GetValues(symbol_name, all_name_indexes);
659     for (size_t i = 0; i < name_match_count; ++i) {
660       if (CheckSymbolAtIndex(all_name_indexes[i], symbol_debug_type,
661                              symbol_visibility))
662         indexes.push_back(all_name_indexes[i]);
663     }
664     return indexes.size() - old_size;
665   }
666   return 0;
667 }
668 
669 uint32_t
670 Symtab::AppendSymbolIndexesWithNameAndType(ConstString symbol_name,
671                                            SymbolType symbol_type,
672                                            std::vector<uint32_t> &indexes) {
673   std::lock_guard<std::recursive_mutex> guard(m_mutex);
674 
675   if (AppendSymbolIndexesWithName(symbol_name, indexes) > 0) {
676     std::vector<uint32_t>::iterator pos = indexes.begin();
677     while (pos != indexes.end()) {
678       if (symbol_type == eSymbolTypeAny ||
679           m_symbols[*pos].GetType() == symbol_type)
680         ++pos;
681       else
682         pos = indexes.erase(pos);
683     }
684   }
685   return indexes.size();
686 }
687 
688 uint32_t Symtab::AppendSymbolIndexesWithNameAndType(
689     ConstString symbol_name, SymbolType symbol_type,
690     Debug symbol_debug_type, Visibility symbol_visibility,
691     std::vector<uint32_t> &indexes) {
692   std::lock_guard<std::recursive_mutex> guard(m_mutex);
693 
694   if (AppendSymbolIndexesWithName(symbol_name, symbol_debug_type,
695                                   symbol_visibility, indexes) > 0) {
696     std::vector<uint32_t>::iterator pos = indexes.begin();
697     while (pos != indexes.end()) {
698       if (symbol_type == eSymbolTypeAny ||
699           m_symbols[*pos].GetType() == symbol_type)
700         ++pos;
701       else
702         pos = indexes.erase(pos);
703     }
704   }
705   return indexes.size();
706 }
707 
708 uint32_t Symtab::AppendSymbolIndexesMatchingRegExAndType(
709     const RegularExpression &regexp, SymbolType symbol_type,
710     std::vector<uint32_t> &indexes) {
711   std::lock_guard<std::recursive_mutex> guard(m_mutex);
712 
713   uint32_t prev_size = indexes.size();
714   uint32_t sym_end = m_symbols.size();
715 
716   for (uint32_t i = 0; i < sym_end; i++) {
717     if (symbol_type == eSymbolTypeAny ||
718         m_symbols[i].GetType() == symbol_type) {
719       const char *name = m_symbols[i].GetName().AsCString();
720       if (name) {
721         if (regexp.Execute(name))
722           indexes.push_back(i);
723       }
724     }
725   }
726   return indexes.size() - prev_size;
727 }
728 
729 uint32_t Symtab::AppendSymbolIndexesMatchingRegExAndType(
730     const RegularExpression &regexp, SymbolType symbol_type,
731     Debug symbol_debug_type, Visibility symbol_visibility,
732     std::vector<uint32_t> &indexes) {
733   std::lock_guard<std::recursive_mutex> guard(m_mutex);
734 
735   uint32_t prev_size = indexes.size();
736   uint32_t sym_end = m_symbols.size();
737 
738   for (uint32_t i = 0; i < sym_end; i++) {
739     if (symbol_type == eSymbolTypeAny ||
740         m_symbols[i].GetType() == symbol_type) {
741       if (!CheckSymbolAtIndex(i, symbol_debug_type, symbol_visibility))
742         continue;
743 
744       const char *name = m_symbols[i].GetName().AsCString();
745       if (name) {
746         if (regexp.Execute(name))
747           indexes.push_back(i);
748       }
749     }
750   }
751   return indexes.size() - prev_size;
752 }
753 
754 Symbol *Symtab::FindSymbolWithType(SymbolType symbol_type,
755                                    Debug symbol_debug_type,
756                                    Visibility symbol_visibility,
757                                    uint32_t &start_idx) {
758   std::lock_guard<std::recursive_mutex> guard(m_mutex);
759 
760   const size_t count = m_symbols.size();
761   for (size_t idx = start_idx; idx < count; ++idx) {
762     if (symbol_type == eSymbolTypeAny ||
763         m_symbols[idx].GetType() == symbol_type) {
764       if (CheckSymbolAtIndex(idx, symbol_debug_type, symbol_visibility)) {
765         start_idx = idx;
766         return &m_symbols[idx];
767       }
768     }
769   }
770   return nullptr;
771 }
772 
773 size_t
774 Symtab::FindAllSymbolsWithNameAndType(ConstString name,
775                                       SymbolType symbol_type,
776                                       std::vector<uint32_t> &symbol_indexes) {
777   std::lock_guard<std::recursive_mutex> guard(m_mutex);
778 
779   static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
780   Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
781   // Initialize all of the lookup by name indexes before converting NAME to a
782   // uniqued string NAME_STR below.
783   if (!m_name_indexes_computed)
784     InitNameIndexes();
785 
786   if (name) {
787     // The string table did have a string that matched, but we need to check
788     // the symbols and match the symbol_type if any was given.
789     AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_indexes);
790   }
791   return symbol_indexes.size();
792 }
793 
794 size_t Symtab::FindAllSymbolsWithNameAndType(
795     ConstString name, SymbolType symbol_type, Debug symbol_debug_type,
796     Visibility symbol_visibility, std::vector<uint32_t> &symbol_indexes) {
797   std::lock_guard<std::recursive_mutex> guard(m_mutex);
798 
799   static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
800   Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
801   // Initialize all of the lookup by name indexes before converting NAME to a
802   // uniqued string NAME_STR below.
803   if (!m_name_indexes_computed)
804     InitNameIndexes();
805 
806   if (name) {
807     // The string table did have a string that matched, but we need to check
808     // the symbols and match the symbol_type if any was given.
809     AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_debug_type,
810                                        symbol_visibility, symbol_indexes);
811   }
812   return symbol_indexes.size();
813 }
814 
815 size_t Symtab::FindAllSymbolsMatchingRexExAndType(
816     const RegularExpression &regex, SymbolType symbol_type,
817     Debug symbol_debug_type, Visibility symbol_visibility,
818     std::vector<uint32_t> &symbol_indexes) {
819   std::lock_guard<std::recursive_mutex> guard(m_mutex);
820 
821   AppendSymbolIndexesMatchingRegExAndType(regex, symbol_type, symbol_debug_type,
822                                           symbol_visibility, symbol_indexes);
823   return symbol_indexes.size();
824 }
825 
826 Symbol *Symtab::FindFirstSymbolWithNameAndType(ConstString name,
827                                                SymbolType symbol_type,
828                                                Debug symbol_debug_type,
829                                                Visibility symbol_visibility) {
830   std::lock_guard<std::recursive_mutex> guard(m_mutex);
831 
832   static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
833   Timer scoped_timer(func_cat, "%s", LLVM_PRETTY_FUNCTION);
834   if (!m_name_indexes_computed)
835     InitNameIndexes();
836 
837   if (name) {
838     std::vector<uint32_t> matching_indexes;
839     // The string table did have a string that matched, but we need to check
840     // the symbols and match the symbol_type if any was given.
841     if (AppendSymbolIndexesWithNameAndType(name, symbol_type, symbol_debug_type,
842                                            symbol_visibility,
843                                            matching_indexes)) {
844       std::vector<uint32_t>::const_iterator pos, end = matching_indexes.end();
845       for (pos = matching_indexes.begin(); pos != end; ++pos) {
846         Symbol *symbol = SymbolAtIndex(*pos);
847 
848         if (symbol->Compare(name, symbol_type))
849           return symbol;
850       }
851     }
852   }
853   return nullptr;
854 }
855 
856 typedef struct {
857   const Symtab *symtab;
858   const addr_t file_addr;
859   Symbol *match_symbol;
860   const uint32_t *match_index_ptr;
861   addr_t match_offset;
862 } SymbolSearchInfo;
863 
864 // Add all the section file start address & size to the RangeVector, recusively
865 // adding any children sections.
866 static void AddSectionsToRangeMap(SectionList *sectlist,
867                                   RangeVector<addr_t, addr_t> &section_ranges) {
868   const int num_sections = sectlist->GetNumSections(0);
869   for (int i = 0; i < num_sections; i++) {
870     SectionSP sect_sp = sectlist->GetSectionAtIndex(i);
871     if (sect_sp) {
872       SectionList &child_sectlist = sect_sp->GetChildren();
873 
874       // If this section has children, add the children to the RangeVector.
875       // Else add this section to the RangeVector.
876       if (child_sectlist.GetNumSections(0) > 0) {
877         AddSectionsToRangeMap(&child_sectlist, section_ranges);
878       } else {
879         size_t size = sect_sp->GetByteSize();
880         if (size > 0) {
881           addr_t base_addr = sect_sp->GetFileAddress();
882           RangeVector<addr_t, addr_t>::Entry entry;
883           entry.SetRangeBase(base_addr);
884           entry.SetByteSize(size);
885           section_ranges.Append(entry);
886         }
887       }
888     }
889   }
890 }
891 
892 void Symtab::InitAddressIndexes() {
893   // Protected function, no need to lock mutex...
894   if (!m_file_addr_to_index_computed && !m_symbols.empty()) {
895     m_file_addr_to_index_computed = true;
896 
897     FileRangeToIndexMap::Entry entry;
898     const_iterator begin = m_symbols.begin();
899     const_iterator end = m_symbols.end();
900     for (const_iterator pos = m_symbols.begin(); pos != end; ++pos) {
901       if (pos->ValueIsAddress()) {
902         entry.SetRangeBase(pos->GetAddressRef().GetFileAddress());
903         entry.SetByteSize(pos->GetByteSize());
904         entry.data = std::distance(begin, pos);
905         m_file_addr_to_index.Append(entry);
906       }
907     }
908     const size_t num_entries = m_file_addr_to_index.GetSize();
909     if (num_entries > 0) {
910       m_file_addr_to_index.Sort();
911 
912       // Create a RangeVector with the start & size of all the sections for
913       // this objfile.  We'll need to check this for any FileRangeToIndexMap
914       // entries with an uninitialized size, which could potentially be a large
915       // number so reconstituting the weak pointer is busywork when it is
916       // invariant information.
917       SectionList *sectlist = m_objfile->GetSectionList();
918       RangeVector<addr_t, addr_t> section_ranges;
919       if (sectlist) {
920         AddSectionsToRangeMap(sectlist, section_ranges);
921         section_ranges.Sort();
922       }
923 
924       // Iterate through the FileRangeToIndexMap and fill in the size for any
925       // entries that didn't already have a size from the Symbol (e.g. if we
926       // have a plain linker symbol with an address only, instead of debug info
927       // where we get an address and a size and a type, etc.)
928       for (size_t i = 0; i < num_entries; i++) {
929         FileRangeToIndexMap::Entry *entry =
930             m_file_addr_to_index.GetMutableEntryAtIndex(i);
931         if (entry->GetByteSize() == 0) {
932           addr_t curr_base_addr = entry->GetRangeBase();
933           const RangeVector<addr_t, addr_t>::Entry *containing_section =
934               section_ranges.FindEntryThatContains(curr_base_addr);
935 
936           // Use the end of the section as the default max size of the symbol
937           addr_t sym_size = 0;
938           if (containing_section) {
939             sym_size =
940                 containing_section->GetByteSize() -
941                 (entry->GetRangeBase() - containing_section->GetRangeBase());
942           }
943 
944           for (size_t j = i; j < num_entries; j++) {
945             FileRangeToIndexMap::Entry *next_entry =
946                 m_file_addr_to_index.GetMutableEntryAtIndex(j);
947             addr_t next_base_addr = next_entry->GetRangeBase();
948             if (next_base_addr > curr_base_addr) {
949               addr_t size_to_next_symbol = next_base_addr - curr_base_addr;
950 
951               // Take the difference between this symbol and the next one as
952               // its size, if it is less than the size of the section.
953               if (sym_size == 0 || size_to_next_symbol < sym_size) {
954                 sym_size = size_to_next_symbol;
955               }
956               break;
957             }
958           }
959 
960           if (sym_size > 0) {
961             entry->SetByteSize(sym_size);
962             Symbol &symbol = m_symbols[entry->data];
963             symbol.SetByteSize(sym_size);
964             symbol.SetSizeIsSynthesized(true);
965           }
966         }
967       }
968 
969       // Sort again in case the range size changes the ordering
970       m_file_addr_to_index.Sort();
971     }
972   }
973 }
974 
975 void Symtab::CalculateSymbolSizes() {
976   std::lock_guard<std::recursive_mutex> guard(m_mutex);
977   // Size computation happens inside InitAddressIndexes.
978   InitAddressIndexes();
979 }
980 
981 Symbol *Symtab::FindSymbolAtFileAddress(addr_t file_addr) {
982   std::lock_guard<std::recursive_mutex> guard(m_mutex);
983   if (!m_file_addr_to_index_computed)
984     InitAddressIndexes();
985 
986   const FileRangeToIndexMap::Entry *entry =
987       m_file_addr_to_index.FindEntryStartsAt(file_addr);
988   if (entry) {
989     Symbol *symbol = SymbolAtIndex(entry->data);
990     if (symbol->GetFileAddress() == file_addr)
991       return symbol;
992   }
993   return nullptr;
994 }
995 
996 Symbol *Symtab::FindSymbolContainingFileAddress(addr_t file_addr) {
997   std::lock_guard<std::recursive_mutex> guard(m_mutex);
998 
999   if (!m_file_addr_to_index_computed)
1000     InitAddressIndexes();
1001 
1002   const FileRangeToIndexMap::Entry *entry =
1003       m_file_addr_to_index.FindEntryThatContains(file_addr);
1004   if (entry) {
1005     Symbol *symbol = SymbolAtIndex(entry->data);
1006     if (symbol->ContainsFileAddress(file_addr))
1007       return symbol;
1008   }
1009   return nullptr;
1010 }
1011 
1012 void Symtab::ForEachSymbolContainingFileAddress(
1013     addr_t file_addr, std::function<bool(Symbol *)> const &callback) {
1014   std::lock_guard<std::recursive_mutex> guard(m_mutex);
1015 
1016   if (!m_file_addr_to_index_computed)
1017     InitAddressIndexes();
1018 
1019   std::vector<uint32_t> all_addr_indexes;
1020 
1021   // Get all symbols with file_addr
1022   const size_t addr_match_count =
1023       m_file_addr_to_index.FindEntryIndexesThatContain(file_addr,
1024                                                        all_addr_indexes);
1025 
1026   for (size_t i = 0; i < addr_match_count; ++i) {
1027     Symbol *symbol = SymbolAtIndex(all_addr_indexes[i]);
1028     if (symbol->ContainsFileAddress(file_addr)) {
1029       if (!callback(symbol))
1030         break;
1031     }
1032   }
1033 }
1034 
1035 void Symtab::SymbolIndicesToSymbolContextList(
1036     std::vector<uint32_t> &symbol_indexes, SymbolContextList &sc_list) {
1037   // No need to protect this call using m_mutex all other method calls are
1038   // already thread safe.
1039 
1040   const bool merge_symbol_into_function = true;
1041   size_t num_indices = symbol_indexes.size();
1042   if (num_indices > 0) {
1043     SymbolContext sc;
1044     sc.module_sp = m_objfile->GetModule();
1045     for (size_t i = 0; i < num_indices; i++) {
1046       sc.symbol = SymbolAtIndex(symbol_indexes[i]);
1047       if (sc.symbol)
1048         sc_list.AppendIfUnique(sc, merge_symbol_into_function);
1049     }
1050   }
1051 }
1052 
1053 size_t Symtab::FindFunctionSymbols(ConstString name,
1054                                    uint32_t name_type_mask,
1055                                    SymbolContextList &sc_list) {
1056   size_t count = 0;
1057   std::vector<uint32_t> symbol_indexes;
1058 
1059   // eFunctionNameTypeAuto should be pre-resolved by a call to
1060   // Module::LookupInfo::LookupInfo()
1061   assert((name_type_mask & eFunctionNameTypeAuto) == 0);
1062 
1063   if (name_type_mask & (eFunctionNameTypeBase | eFunctionNameTypeFull)) {
1064     std::vector<uint32_t> temp_symbol_indexes;
1065     FindAllSymbolsWithNameAndType(name, eSymbolTypeAny, temp_symbol_indexes);
1066 
1067     unsigned temp_symbol_indexes_size = temp_symbol_indexes.size();
1068     if (temp_symbol_indexes_size > 0) {
1069       std::lock_guard<std::recursive_mutex> guard(m_mutex);
1070       for (unsigned i = 0; i < temp_symbol_indexes_size; i++) {
1071         SymbolContext sym_ctx;
1072         sym_ctx.symbol = SymbolAtIndex(temp_symbol_indexes[i]);
1073         if (sym_ctx.symbol) {
1074           switch (sym_ctx.symbol->GetType()) {
1075           case eSymbolTypeCode:
1076           case eSymbolTypeResolver:
1077           case eSymbolTypeReExported:
1078             symbol_indexes.push_back(temp_symbol_indexes[i]);
1079             break;
1080           default:
1081             break;
1082           }
1083         }
1084       }
1085     }
1086   }
1087 
1088   if (name_type_mask & eFunctionNameTypeBase) {
1089     // From mangled names we can't tell what is a basename and what is a method
1090     // name, so we just treat them the same
1091     if (!m_name_indexes_computed)
1092       InitNameIndexes();
1093 
1094     if (!m_basename_to_index.IsEmpty()) {
1095       const UniqueCStringMap<uint32_t>::Entry *match;
1096       for (match = m_basename_to_index.FindFirstValueForName(name);
1097            match != nullptr;
1098            match = m_basename_to_index.FindNextValueForName(match)) {
1099         symbol_indexes.push_back(match->value);
1100       }
1101     }
1102   }
1103 
1104   if (name_type_mask & eFunctionNameTypeMethod) {
1105     if (!m_name_indexes_computed)
1106       InitNameIndexes();
1107 
1108     if (!m_method_to_index.IsEmpty()) {
1109       const UniqueCStringMap<uint32_t>::Entry *match;
1110       for (match = m_method_to_index.FindFirstValueForName(name);
1111            match != nullptr;
1112            match = m_method_to_index.FindNextValueForName(match)) {
1113         symbol_indexes.push_back(match->value);
1114       }
1115     }
1116   }
1117 
1118   if (name_type_mask & eFunctionNameTypeSelector) {
1119     if (!m_name_indexes_computed)
1120       InitNameIndexes();
1121 
1122     if (!m_selector_to_index.IsEmpty()) {
1123       const UniqueCStringMap<uint32_t>::Entry *match;
1124       for (match = m_selector_to_index.FindFirstValueForName(name);
1125            match != nullptr;
1126            match = m_selector_to_index.FindNextValueForName(match)) {
1127         symbol_indexes.push_back(match->value);
1128       }
1129     }
1130   }
1131 
1132   if (!symbol_indexes.empty()) {
1133     llvm::sort(symbol_indexes.begin(), symbol_indexes.end());
1134     symbol_indexes.erase(
1135         std::unique(symbol_indexes.begin(), symbol_indexes.end()),
1136         symbol_indexes.end());
1137     count = symbol_indexes.size();
1138     SymbolIndicesToSymbolContextList(symbol_indexes, sc_list);
1139   }
1140 
1141   return count;
1142 }
1143 
1144 const Symbol *Symtab::GetParent(Symbol *child_symbol) const {
1145   uint32_t child_idx = GetIndexForSymbol(child_symbol);
1146   if (child_idx != UINT32_MAX && child_idx > 0) {
1147     for (uint32_t idx = child_idx - 1; idx != UINT32_MAX; --idx) {
1148       const Symbol *symbol = SymbolAtIndex(idx);
1149       const uint32_t sibling_idx = symbol->GetSiblingIndex();
1150       if (sibling_idx != UINT32_MAX && sibling_idx > child_idx)
1151         return symbol;
1152     }
1153   }
1154   return NULL;
1155 }
1156