1 //===-- ManualDWARFIndex.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Plugins/SymbolFile/DWARF/ManualDWARFIndex.h"
10 #include "Plugins/Language/ObjC/ObjCLanguage.h"
11 #include "Plugins/SymbolFile/DWARF/DWARFDebugInfo.h"
12 #include "Plugins/SymbolFile/DWARF/DWARFDeclContext.h"
13 #include "Plugins/SymbolFile/DWARF/LogChannelDWARF.h"
14 #include "Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.h"
15 #include "lldb/Core/Module.h"
16 #include "lldb/Host/TaskPool.h"
17 #include "lldb/Symbol/ObjectFile.h"
18 #include "lldb/Utility/Stream.h"
19 #include "lldb/Utility/Timer.h"
20 
21 using namespace lldb_private;
22 using namespace lldb;
23 
24 void ManualDWARFIndex::Index() {
25   if (!m_dwarf)
26     return;
27 
28   SymbolFileDWARF &main_dwarf = *m_dwarf;
29   m_dwarf = nullptr;
30 
31   static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
32   Timer scoped_timer(func_cat, "%p", static_cast<void *>(&main_dwarf));
33 
34   DWARFDebugInfo &main_info = main_dwarf.DebugInfo();
35   SymbolFileDWARFDwo *dwp_dwarf = main_dwarf.GetDwpSymbolFile().get();
36   DWARFDebugInfo *dwp_info = dwp_dwarf ? &dwp_dwarf->DebugInfo() : nullptr;
37 
38   std::vector<DWARFUnit *> units_to_index;
39   units_to_index.reserve(main_info.GetNumUnits() +
40                          (dwp_info ? dwp_info->GetNumUnits() : 0));
41 
42   // Process all units in the main file, as well as any type units in the dwp
43   // file. Type units in dwo files are handled when we reach the dwo file in
44   // IndexUnit.
45   for (size_t U = 0; U < main_info.GetNumUnits(); ++U) {
46     DWARFUnit *unit = main_info.GetUnitAtIndex(U);
47     if (unit && m_units_to_avoid.count(unit->GetOffset()) == 0)
48       units_to_index.push_back(unit);
49   }
50   if (dwp_info && dwp_info->ContainsTypeUnits()) {
51     for (size_t U = 0; U < dwp_info->GetNumUnits(); ++U) {
52       if (auto *tu = llvm::dyn_cast<DWARFTypeUnit>(dwp_info->GetUnitAtIndex(U)))
53         units_to_index.push_back(tu);
54     }
55   }
56 
57   if (units_to_index.empty())
58     return;
59 
60   std::vector<IndexSet> sets(units_to_index.size());
61 
62   // Keep memory down by clearing DIEs for any units if indexing
63   // caused us to load the unit's DIEs.
64   std::vector<llvm::Optional<DWARFUnit::ScopedExtractDIEs>> clear_cu_dies(
65       units_to_index.size());
66   auto parser_fn = [&](size_t cu_idx) {
67     IndexUnit(*units_to_index[cu_idx], dwp_dwarf, sets[cu_idx]);
68   };
69 
70   auto extract_fn = [&units_to_index, &clear_cu_dies](size_t cu_idx) {
71     clear_cu_dies[cu_idx] = units_to_index[cu_idx]->ExtractDIEsScoped();
72   };
73 
74   // Create a task runner that extracts dies for each DWARF unit in a
75   // separate thread
76   // First figure out which units didn't have their DIEs already
77   // parsed and remember this.  If no DIEs were parsed prior to this index
78   // function call, we are going to want to clear the CU dies after we are
79   // done indexing to make sure we don't pull in all DWARF dies, but we need
80   // to wait until all units have been indexed in case a DIE in one
81   // unit refers to another and the indexes accesses those DIEs.
82   TaskMapOverInt(0, units_to_index.size(), extract_fn);
83 
84   // Now create a task runner that can index each DWARF unit in a
85   // separate thread so we can index quickly.
86 
87   TaskMapOverInt(0, units_to_index.size(), parser_fn);
88 
89   auto finalize_fn = [this, &sets](NameToDIE(IndexSet::*index)) {
90     NameToDIE &result = m_set.*index;
91     for (auto &set : sets)
92       result.Append(set.*index);
93     result.Finalize();
94   };
95 
96   TaskPool::RunTasks([&]() { finalize_fn(&IndexSet::function_basenames); },
97                      [&]() { finalize_fn(&IndexSet::function_fullnames); },
98                      [&]() { finalize_fn(&IndexSet::function_methods); },
99                      [&]() { finalize_fn(&IndexSet::function_selectors); },
100                      [&]() { finalize_fn(&IndexSet::objc_class_selectors); },
101                      [&]() { finalize_fn(&IndexSet::globals); },
102                      [&]() { finalize_fn(&IndexSet::types); },
103                      [&]() { finalize_fn(&IndexSet::namespaces); });
104 }
105 
106 void ManualDWARFIndex::IndexUnit(DWARFUnit &unit, SymbolFileDWARFDwo *dwp,
107                                  IndexSet &set) {
108   Log *log = LogChannelDWARF::GetLogIfAll(DWARF_LOG_LOOKUPS);
109 
110   if (log) {
111     m_module.LogMessage(
112         log, "ManualDWARFIndex::IndexUnit for unit at .debug_info[0x%8.8x]",
113         unit.GetOffset());
114   }
115 
116   const LanguageType cu_language = SymbolFileDWARF::GetLanguage(unit);
117 
118   IndexUnitImpl(unit, cu_language, set);
119 
120   if (SymbolFileDWARFDwo *dwo_symbol_file = unit.GetDwoSymbolFile()) {
121     // Type units in a dwp file are indexed separately, so we just need to
122     // process the split unit here. However, if the split unit is in a dwo file,
123     // then we need to process type units here.
124     if (dwo_symbol_file == dwp) {
125       IndexUnitImpl(unit.GetNonSkeletonUnit(), cu_language, set);
126     } else {
127       DWARFDebugInfo &dwo_info = dwo_symbol_file->DebugInfo();
128       for (size_t i = 0; i < dwo_info.GetNumUnits(); ++i)
129         IndexUnitImpl(*dwo_info.GetUnitAtIndex(i), cu_language, set);
130     }
131   }
132 }
133 
134 void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit,
135                                      const LanguageType cu_language,
136                                      IndexSet &set) {
137   for (const DWARFDebugInfoEntry &die : unit.dies()) {
138     const dw_tag_t tag = die.Tag();
139 
140     switch (tag) {
141     case DW_TAG_array_type:
142     case DW_TAG_base_type:
143     case DW_TAG_class_type:
144     case DW_TAG_constant:
145     case DW_TAG_enumeration_type:
146     case DW_TAG_inlined_subroutine:
147     case DW_TAG_namespace:
148     case DW_TAG_string_type:
149     case DW_TAG_structure_type:
150     case DW_TAG_subprogram:
151     case DW_TAG_subroutine_type:
152     case DW_TAG_typedef:
153     case DW_TAG_union_type:
154     case DW_TAG_unspecified_type:
155     case DW_TAG_variable:
156       break;
157 
158     default:
159       continue;
160     }
161 
162     DWARFAttributes attributes;
163     const char *name = nullptr;
164     const char *mangled_cstr = nullptr;
165     bool is_declaration = false;
166     // bool is_artificial = false;
167     bool has_address = false;
168     bool has_location_or_const_value = false;
169     bool is_global_or_static_variable = false;
170 
171     DWARFFormValue specification_die_form;
172     const size_t num_attributes = die.GetAttributes(&unit, attributes);
173     if (num_attributes > 0) {
174       for (uint32_t i = 0; i < num_attributes; ++i) {
175         dw_attr_t attr = attributes.AttributeAtIndex(i);
176         DWARFFormValue form_value;
177         switch (attr) {
178         case DW_AT_name:
179           if (attributes.ExtractFormValueAtIndex(i, form_value))
180             name = form_value.AsCString();
181           break;
182 
183         case DW_AT_declaration:
184           if (attributes.ExtractFormValueAtIndex(i, form_value))
185             is_declaration = form_value.Unsigned() != 0;
186           break;
187 
188         //                case DW_AT_artificial:
189         //                    if (attributes.ExtractFormValueAtIndex(i,
190         //                    form_value))
191         //                        is_artificial = form_value.Unsigned() != 0;
192         //                    break;
193 
194         case DW_AT_MIPS_linkage_name:
195         case DW_AT_linkage_name:
196           if (attributes.ExtractFormValueAtIndex(i, form_value))
197             mangled_cstr = form_value.AsCString();
198           break;
199 
200         case DW_AT_low_pc:
201         case DW_AT_high_pc:
202         case DW_AT_ranges:
203           has_address = true;
204           break;
205 
206         case DW_AT_entry_pc:
207           has_address = true;
208           break;
209 
210         case DW_AT_location:
211         case DW_AT_const_value:
212           has_location_or_const_value = true;
213           if (tag == DW_TAG_variable) {
214             const DWARFDebugInfoEntry *parent_die = die.GetParent();
215             while (parent_die != nullptr) {
216               switch (parent_die->Tag()) {
217               case DW_TAG_subprogram:
218               case DW_TAG_lexical_block:
219               case DW_TAG_inlined_subroutine:
220                 // Even if this is a function level static, we don't add it. We
221                 // could theoretically add these if we wanted to by
222                 // introspecting into the DW_AT_location and seeing if the
223                 // location describes a hard coded address, but we don't want
224                 // the performance penalty of that right now.
225                 is_global_or_static_variable = false;
226                 // if (attributes.ExtractFormValueAtIndex(dwarf, i,
227                 //                                        form_value)) {
228                 //   // If we have valid block data, then we have location
229                 //   // expression bytesthat are fixed (not a location list).
230                 //   const uint8_t *block_data = form_value.BlockData();
231                 //   if (block_data) {
232                 //     uint32_t block_length = form_value.Unsigned();
233                 //     if (block_length == 1 +
234                 //     attributes.UnitAtIndex(i)->GetAddressByteSize()) {
235                 //       if (block_data[0] == DW_OP_addr)
236                 //         add_die = true;
237                 //     }
238                 //   }
239                 // }
240                 parent_die = nullptr; // Terminate the while loop.
241                 break;
242 
243               case DW_TAG_compile_unit:
244               case DW_TAG_partial_unit:
245                 is_global_or_static_variable = true;
246                 parent_die = nullptr; // Terminate the while loop.
247                 break;
248 
249               default:
250                 parent_die =
251                     parent_die->GetParent(); // Keep going in the while loop.
252                 break;
253               }
254             }
255           }
256           break;
257 
258         case DW_AT_specification:
259           if (attributes.ExtractFormValueAtIndex(i, form_value))
260             specification_die_form = form_value;
261           break;
262         }
263       }
264     }
265 
266     DIERef ref = *DWARFDIE(&unit, &die).GetDIERef();
267     switch (tag) {
268     case DW_TAG_inlined_subroutine:
269     case DW_TAG_subprogram:
270       if (has_address) {
271         if (name) {
272           bool is_objc_method = false;
273           if (cu_language == eLanguageTypeObjC ||
274               cu_language == eLanguageTypeObjC_plus_plus) {
275             ObjCLanguage::MethodName objc_method(name, true);
276             if (objc_method.IsValid(true)) {
277               is_objc_method = true;
278               ConstString class_name_with_category(
279                   objc_method.GetClassNameWithCategory());
280               ConstString objc_selector_name(objc_method.GetSelector());
281               ConstString objc_fullname_no_category_name(
282                   objc_method.GetFullNameWithoutCategory(true));
283               ConstString class_name_no_category(objc_method.GetClassName());
284               set.function_fullnames.Insert(ConstString(name), ref);
285               if (class_name_with_category)
286                 set.objc_class_selectors.Insert(class_name_with_category, ref);
287               if (class_name_no_category &&
288                   class_name_no_category != class_name_with_category)
289                 set.objc_class_selectors.Insert(class_name_no_category, ref);
290               if (objc_selector_name)
291                 set.function_selectors.Insert(objc_selector_name, ref);
292               if (objc_fullname_no_category_name)
293                 set.function_fullnames.Insert(objc_fullname_no_category_name,
294                                               ref);
295             }
296           }
297           // If we have a mangled name, then the DW_AT_name attribute is
298           // usually the method name without the class or any parameters
299           bool is_method = DWARFDIE(&unit, &die).IsMethod();
300 
301           if (is_method)
302             set.function_methods.Insert(ConstString(name), ref);
303           else
304             set.function_basenames.Insert(ConstString(name), ref);
305 
306           if (!is_method && !mangled_cstr && !is_objc_method)
307             set.function_fullnames.Insert(ConstString(name), ref);
308         }
309         if (mangled_cstr) {
310           // Make sure our mangled name isn't the same string table entry as
311           // our name. If it starts with '_', then it is ok, else compare the
312           // string to make sure it isn't the same and we don't end up with
313           // duplicate entries
314           if (name && name != mangled_cstr &&
315               ((mangled_cstr[0] == '_') ||
316                (::strcmp(name, mangled_cstr) != 0))) {
317             set.function_fullnames.Insert(ConstString(mangled_cstr), ref);
318           }
319         }
320       }
321       break;
322 
323     case DW_TAG_array_type:
324     case DW_TAG_base_type:
325     case DW_TAG_class_type:
326     case DW_TAG_constant:
327     case DW_TAG_enumeration_type:
328     case DW_TAG_string_type:
329     case DW_TAG_structure_type:
330     case DW_TAG_subroutine_type:
331     case DW_TAG_typedef:
332     case DW_TAG_union_type:
333     case DW_TAG_unspecified_type:
334       if (name && !is_declaration)
335         set.types.Insert(ConstString(name), ref);
336       if (mangled_cstr && !is_declaration)
337         set.types.Insert(ConstString(mangled_cstr), ref);
338       break;
339 
340     case DW_TAG_namespace:
341       if (name)
342         set.namespaces.Insert(ConstString(name), ref);
343       break;
344 
345     case DW_TAG_variable:
346       if (name && has_location_or_const_value && is_global_or_static_variable) {
347         set.globals.Insert(ConstString(name), ref);
348         // Be sure to include variables by their mangled and demangled names if
349         // they have any since a variable can have a basename "i", a mangled
350         // named "_ZN12_GLOBAL__N_11iE" and a demangled mangled name
351         // "(anonymous namespace)::i"...
352 
353         // Make sure our mangled name isn't the same string table entry as our
354         // name. If it starts with '_', then it is ok, else compare the string
355         // to make sure it isn't the same and we don't end up with duplicate
356         // entries
357         if (mangled_cstr && name != mangled_cstr &&
358             ((mangled_cstr[0] == '_') || (::strcmp(name, mangled_cstr) != 0))) {
359           set.globals.Insert(ConstString(mangled_cstr), ref);
360         }
361       }
362       break;
363 
364     default:
365       continue;
366     }
367   }
368 }
369 
370 void ManualDWARFIndex::GetGlobalVariables(ConstString basename, DIEArray &offsets) {
371   Index();
372   m_set.globals.Find(basename, offsets);
373 }
374 
375 void ManualDWARFIndex::GetGlobalVariables(const RegularExpression &regex,
376                                           DIEArray &offsets) {
377   Index();
378   m_set.globals.Find(regex, offsets);
379 }
380 
381 void ManualDWARFIndex::GetGlobalVariables(const DWARFUnit &unit,
382                                           DIEArray &offsets) {
383   Index();
384   m_set.globals.FindAllEntriesForUnit(unit, offsets);
385 }
386 
387 void ManualDWARFIndex::GetObjCMethods(ConstString class_name,
388                                       DIEArray &offsets) {
389   Index();
390   m_set.objc_class_selectors.Find(class_name, offsets);
391 }
392 
393 void ManualDWARFIndex::GetCompleteObjCClass(ConstString class_name,
394                                             bool must_be_implementation,
395                                             DIEArray &offsets) {
396   Index();
397   m_set.types.Find(class_name, offsets);
398 }
399 
400 void ManualDWARFIndex::GetTypes(ConstString name, DIEArray &offsets) {
401   Index();
402   m_set.types.Find(name, offsets);
403 }
404 
405 void ManualDWARFIndex::GetTypes(const DWARFDeclContext &context,
406                                 DIEArray &offsets) {
407   Index();
408   m_set.types.Find(ConstString(context[0].name), offsets);
409 }
410 
411 void ManualDWARFIndex::GetNamespaces(ConstString name, DIEArray &offsets) {
412   Index();
413   m_set.namespaces.Find(name, offsets);
414 }
415 
416 void ManualDWARFIndex::GetFunctions(ConstString name, SymbolFileDWARF &dwarf,
417                                     const CompilerDeclContext &parent_decl_ctx,
418                                     uint32_t name_type_mask,
419                                     std::vector<DWARFDIE> &dies) {
420   Index();
421 
422   if (name_type_mask & eFunctionNameTypeFull) {
423     DIEArray offsets;
424     m_set.function_fullnames.Find(name, offsets);
425     for (const DIERef &die_ref: offsets) {
426       DWARFDIE die = dwarf.GetDIE(die_ref);
427       if (!die)
428         continue;
429       if (!SymbolFileDWARF::DIEInDeclContext(parent_decl_ctx, die))
430         continue;
431       dies.push_back(die);
432     }
433   }
434   if (name_type_mask & eFunctionNameTypeBase) {
435     DIEArray offsets;
436     m_set.function_basenames.Find(name, offsets);
437     for (const DIERef &die_ref: offsets) {
438       DWARFDIE die = dwarf.GetDIE(die_ref);
439       if (!die)
440         continue;
441       if (!SymbolFileDWARF::DIEInDeclContext(parent_decl_ctx, die))
442         continue;
443       dies.push_back(die);
444     }
445   }
446 
447   if (name_type_mask & eFunctionNameTypeMethod && !parent_decl_ctx.IsValid()) {
448     DIEArray offsets;
449     m_set.function_methods.Find(name, offsets);
450     for (const DIERef &die_ref: offsets) {
451       DWARFDIE die = dwarf.GetDIE(die_ref);
452       if (!die)
453         continue;
454       dies.push_back(die);
455     }
456   }
457 
458   if (name_type_mask & eFunctionNameTypeSelector &&
459       !parent_decl_ctx.IsValid()) {
460     DIEArray offsets;
461     m_set.function_selectors.Find(name, offsets);
462     for (const DIERef &die_ref: offsets) {
463       DWARFDIE die = dwarf.GetDIE(die_ref);
464       if (!die)
465         continue;
466       dies.push_back(die);
467     }
468   }
469 }
470 
471 void ManualDWARFIndex::GetFunctions(const RegularExpression &regex,
472                                     DIEArray &offsets) {
473   Index();
474 
475   m_set.function_basenames.Find(regex, offsets);
476   m_set.function_fullnames.Find(regex, offsets);
477 }
478 
479 void ManualDWARFIndex::Dump(Stream &s) {
480   s.Format("Manual DWARF index for ({0}) '{1:F}':",
481            m_module.GetArchitecture().GetArchitectureName(),
482            m_module.GetObjectFile()->GetFileSpec());
483   s.Printf("\nFunction basenames:\n");
484   m_set.function_basenames.Dump(&s);
485   s.Printf("\nFunction fullnames:\n");
486   m_set.function_fullnames.Dump(&s);
487   s.Printf("\nFunction methods:\n");
488   m_set.function_methods.Dump(&s);
489   s.Printf("\nFunction selectors:\n");
490   m_set.function_selectors.Dump(&s);
491   s.Printf("\nObjective-C class selectors:\n");
492   m_set.objc_class_selectors.Dump(&s);
493   s.Printf("\nGlobals and statics:\n");
494   m_set.globals.Dump(&s);
495   s.Printf("\nTypes:\n");
496   m_set.types.Dump(&s);
497   s.Printf("\nNamespaces:\n");
498   m_set.namespaces.Dump(&s);
499 }
500