1 //===-- HashedNameToDIE.cpp -------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HashedNameToDIE.h"
10 #include "llvm/ADT/StringRef.h"
11 
12 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
13                                       DIEArray &die_offsets) {
14   const size_t count = die_info_array.size();
15   for (size_t i = 0; i < count; ++i)
16     die_offsets.emplace_back(die_info_array[i]);
17 }
18 
19 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
20                                       const dw_tag_t tag,
21                                       DIEArray &die_offsets) {
22   if (tag == 0) {
23     ExtractDIEArray(die_info_array, die_offsets);
24   } else {
25     const size_t count = die_info_array.size();
26     for (size_t i = 0; i < count; ++i) {
27       const dw_tag_t die_tag = die_info_array[i].tag;
28       bool tag_matches = die_tag == 0 || tag == die_tag;
29       if (!tag_matches) {
30         if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
31           tag_matches =
32               tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
33       }
34       if (tag_matches)
35         die_offsets.emplace_back(die_info_array[i]);
36     }
37   }
38 }
39 
40 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
41                                       const dw_tag_t tag,
42                                       const uint32_t qualified_name_hash,
43                                       DIEArray &die_offsets) {
44   if (tag == 0) {
45     ExtractDIEArray(die_info_array, die_offsets);
46   } else {
47     const size_t count = die_info_array.size();
48     for (size_t i = 0; i < count; ++i) {
49       if (qualified_name_hash != die_info_array[i].qualified_name_hash)
50         continue;
51       const dw_tag_t die_tag = die_info_array[i].tag;
52       bool tag_matches = die_tag == 0 || tag == die_tag;
53       if (!tag_matches) {
54         if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
55           tag_matches =
56               tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
57       }
58       if (tag_matches)
59         die_offsets.emplace_back(die_info_array[i]);
60     }
61   }
62 }
63 
64 void DWARFMappedHash::ExtractClassOrStructDIEArray(
65     const DIEInfoArray &die_info_array,
66     bool return_implementation_only_if_available, DIEArray &die_offsets) {
67   const size_t count = die_info_array.size();
68   for (size_t i = 0; i < count; ++i) {
69     const dw_tag_t die_tag = die_info_array[i].tag;
70     if (die_tag == 0 || die_tag == DW_TAG_class_type ||
71         die_tag == DW_TAG_structure_type) {
72       if (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) {
73         if (return_implementation_only_if_available) {
74           // We found the one true definition for this class, so only return
75           // that
76           die_offsets.clear();
77           die_offsets.emplace_back(die_info_array[i]);
78           return;
79         } else {
80           // Put the one true definition as the first entry so it matches first
81           die_offsets.emplace(die_offsets.begin(), die_info_array[i]);
82         }
83       } else {
84         die_offsets.emplace_back(die_info_array[i]);
85       }
86     }
87   }
88 }
89 
90 void DWARFMappedHash::ExtractTypesFromDIEArray(
91     const DIEInfoArray &die_info_array, uint32_t type_flag_mask,
92     uint32_t type_flag_value, DIEArray &die_offsets) {
93   const size_t count = die_info_array.size();
94   for (size_t i = 0; i < count; ++i) {
95     if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value)
96       die_offsets.emplace_back(die_info_array[i]);
97   }
98 }
99 
100 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) {
101   switch (atom) {
102   case eAtomTypeNULL:
103     return "NULL";
104   case eAtomTypeDIEOffset:
105     return "die-offset";
106   case eAtomTypeCUOffset:
107     return "cu-offset";
108   case eAtomTypeTag:
109     return "die-tag";
110   case eAtomTypeNameFlags:
111     return "name-flags";
112   case eAtomTypeTypeFlags:
113     return "type-flags";
114   case eAtomTypeQualNameHash:
115     return "qualified-name-hash";
116   }
117   return "<invalid>";
118 }
119 
120 DWARFMappedHash::DIEInfo::DIEInfo()
121     : tag(0), type_flags(0), qualified_name_hash(0) {}
122 
123 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t c, dw_offset_t o, dw_tag_t t,
124                                   uint32_t f, uint32_t h)
125     : die_ref(DIERef::Section::DebugInfo, c, o), tag(t), type_flags(f),
126       qualified_name_hash(h) {}
127 
128 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset)
129     : die_base_offset(_die_base_offset), atoms(), atom_mask(0),
130       min_hash_data_byte_size(0), hash_data_has_fixed_byte_size(true) {
131   // Define an array of DIE offsets by first defining an array, and then define
132   // the atom type for the array, in this case we have an array of DIE offsets
133   AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
134 }
135 
136 void DWARFMappedHash::Prologue::ClearAtoms() {
137   hash_data_has_fixed_byte_size = true;
138   min_hash_data_byte_size = 0;
139   atom_mask = 0;
140   atoms.clear();
141 }
142 
143 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const {
144   return (atom_mask & (1u << atom_type)) != 0;
145 }
146 
147 void DWARFMappedHash::Prologue::Clear() {
148   die_base_offset = 0;
149   ClearAtoms();
150 }
151 
152 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) {
153   atoms.push_back({type, form});
154   atom_mask |= 1u << type;
155   switch (form) {
156   case DW_FORM_indirect:
157   case DW_FORM_exprloc:
158   case DW_FORM_flag_present:
159   case DW_FORM_ref_sig8:
160     llvm_unreachable("Unhandled atom form");
161 
162   case DW_FORM_addrx:
163   case DW_FORM_string:
164   case DW_FORM_block:
165   case DW_FORM_block1:
166   case DW_FORM_sdata:
167   case DW_FORM_udata:
168   case DW_FORM_ref_udata:
169   case DW_FORM_GNU_addr_index:
170   case DW_FORM_GNU_str_index:
171     hash_data_has_fixed_byte_size = false;
172     LLVM_FALLTHROUGH;
173   case DW_FORM_flag:
174   case DW_FORM_data1:
175   case DW_FORM_ref1:
176   case DW_FORM_sec_offset:
177     min_hash_data_byte_size += 1;
178     break;
179 
180   case DW_FORM_block2:
181     hash_data_has_fixed_byte_size = false;
182     LLVM_FALLTHROUGH;
183   case DW_FORM_data2:
184   case DW_FORM_ref2:
185     min_hash_data_byte_size += 2;
186     break;
187 
188   case DW_FORM_block4:
189     hash_data_has_fixed_byte_size = false;
190     LLVM_FALLTHROUGH;
191   case DW_FORM_data4:
192   case DW_FORM_ref4:
193   case DW_FORM_addr:
194   case DW_FORM_ref_addr:
195   case DW_FORM_strp:
196     min_hash_data_byte_size += 4;
197     break;
198 
199   case DW_FORM_data8:
200   case DW_FORM_ref8:
201     min_hash_data_byte_size += 8;
202     break;
203   }
204 }
205 
206 lldb::offset_t
207 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data,
208                                 lldb::offset_t offset) {
209   ClearAtoms();
210 
211   die_base_offset = data.GetU32(&offset);
212 
213   const uint32_t atom_count = data.GetU32(&offset);
214   if (atom_count == 0x00060003u) {
215     // Old format, deal with contents of old pre-release format
216     while (data.GetU32(&offset))
217       /* do nothing */;
218 
219     // Hardcode to the only known value for now.
220     AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
221   } else {
222     for (uint32_t i = 0; i < atom_count; ++i) {
223       AtomType type = (AtomType)data.GetU16(&offset);
224       dw_form_t form = (dw_form_t)data.GetU16(&offset);
225       AppendAtom(type, form);
226     }
227   }
228   return offset;
229 }
230 
231 size_t DWARFMappedHash::Prologue::GetByteSize() const {
232   // Add an extra count to the atoms size for the zero termination Atom that
233   // gets written to disk
234   return sizeof(die_base_offset) + sizeof(uint32_t) +
235          atoms.size() * sizeof(Atom);
236 }
237 
238 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const {
239   return min_hash_data_byte_size;
240 }
241 
242 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const {
243   return hash_data_has_fixed_byte_size;
244 }
245 
246 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) {
247   return header_data.GetByteSize();
248 }
249 
250 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data,
251                                              lldb::offset_t offset) {
252   offset = MappedHash::Header<Prologue>::Read(data, offset);
253   if (offset != UINT32_MAX) {
254     offset = header_data.Read(data, offset);
255   }
256   return offset;
257 }
258 
259 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data,
260                                    lldb::offset_t *offset_ptr,
261                                    DIEInfo &hash_data) const {
262   const size_t num_atoms = header_data.atoms.size();
263   if (num_atoms == 0)
264     return false;
265 
266   for (size_t i = 0; i < num_atoms; ++i) {
267     DWARFFormValue form_value(nullptr, header_data.atoms[i].form);
268 
269     if (!form_value.ExtractValue(data, offset_ptr))
270       return false;
271 
272     switch (header_data.atoms[i].type) {
273     case eAtomTypeDIEOffset: // DIE offset, check form for encoding
274       hash_data.die_ref.die_offset =
275           DWARFFormValue::IsDataForm(form_value.Form())
276               ? form_value.Unsigned()
277               : form_value.Reference(header_data.die_base_offset);
278       break;
279 
280     case eAtomTypeTag: // DW_TAG value for the DIE
281       hash_data.tag = (dw_tag_t)form_value.Unsigned();
282       break;
283 
284     case eAtomTypeTypeFlags: // Flags from enum TypeFlags
285       hash_data.type_flags = (uint32_t)form_value.Unsigned();
286       break;
287 
288     case eAtomTypeQualNameHash: // Flags from enum TypeFlags
289       hash_data.qualified_name_hash = form_value.Unsigned();
290       break;
291 
292     default:
293       // We can always skip atoms we don't know about
294       break;
295     }
296   }
297   return true;
298 }
299 
300 DWARFMappedHash::MemoryTable::MemoryTable(
301     lldb_private::DWARFDataExtractor &table_data,
302     const lldb_private::DWARFDataExtractor &string_table, const char *name)
303     : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data),
304       m_data(table_data), m_string_table(string_table), m_name(name) {}
305 
306 const char *
307 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const {
308   // The key in the DWARF table is the .debug_str offset for the string
309   return m_string_table.PeekCStr(key);
310 }
311 
312 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset,
313                                                 HashData &hash_data) const {
314   lldb::offset_t offset = hash_data_offset;
315   offset += 4; // Skip string table offset that contains offset of hash name in
316                // .debug_str
317   const uint32_t count = m_data.GetU32(&offset);
318   if (count > 0) {
319     hash_data.resize(count);
320     for (uint32_t i = 0; i < count; ++i) {
321       if (!m_header.Read(m_data, &offset, hash_data[i]))
322         return false;
323     }
324   } else
325     hash_data.clear();
326   return true;
327 }
328 
329 DWARFMappedHash::MemoryTable::Result
330 DWARFMappedHash::MemoryTable::GetHashDataForName(
331     llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr,
332     Pair &pair) const {
333   pair.key = m_data.GetU32(hash_data_offset_ptr);
334   pair.value.clear();
335 
336   // If the key is zero, this terminates our chain of HashData objects for this
337   // hash value.
338   if (pair.key == 0)
339     return eResultEndOfHashData;
340 
341   // There definitely should be a string for this string offset, if there
342   // isn't, there is something wrong, return and error
343   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
344   if (strp_cstr == nullptr) {
345     *hash_data_offset_ptr = UINT32_MAX;
346     return eResultError;
347   }
348 
349   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
350   const size_t min_total_hash_data_size =
351       count * m_header.header_data.GetMinimumHashDataByteSize();
352   if (count > 0 &&
353       m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
354                                       min_total_hash_data_size)) {
355     // We have at least one HashData entry, and we have enough data to parse at
356     // least "count" HashData entries.
357 
358     // First make sure the entire C string matches...
359     const bool match = name == strp_cstr;
360 
361     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
362       // If the string doesn't match and we have fixed size data, we can just
363       // add the total byte size of all HashData objects to the hash data
364       // offset and be done...
365       *hash_data_offset_ptr += min_total_hash_data_size;
366     } else {
367       // If the string does match, or we don't have fixed size data then we
368       // need to read the hash data as a stream. If the string matches we also
369       // append all HashData objects to the value array.
370       for (uint32_t i = 0; i < count; ++i) {
371         DIEInfo die_info;
372         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
373           // Only happened if the HashData of the string matched...
374           if (match)
375             pair.value.push_back(die_info);
376         } else {
377           // Something went wrong while reading the data
378           *hash_data_offset_ptr = UINT32_MAX;
379           return eResultError;
380         }
381       }
382     }
383     // Return the correct response depending on if the string matched or not...
384     if (match)
385       return eResultKeyMatch; // The key (cstring) matches and we have lookup
386                               // results!
387     else
388       return eResultKeyMismatch; // The key doesn't match, this function will
389                                  // get called
390     // again for the next key/value or the key terminator which in our case is
391     // a zero .debug_str offset.
392   } else {
393     *hash_data_offset_ptr = UINT32_MAX;
394     return eResultError;
395   }
396 }
397 
398 DWARFMappedHash::MemoryTable::Result
399 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression(
400     const lldb_private::RegularExpression &regex,
401     lldb::offset_t *hash_data_offset_ptr, Pair &pair) const {
402   pair.key = m_data.GetU32(hash_data_offset_ptr);
403   // If the key is zero, this terminates our chain of HashData objects for this
404   // hash value.
405   if (pair.key == 0)
406     return eResultEndOfHashData;
407 
408   // There definitely should be a string for this string offset, if there
409   // isn't, there is something wrong, return and error
410   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
411   if (strp_cstr == nullptr)
412     return eResultError;
413 
414   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
415   const size_t min_total_hash_data_size =
416       count * m_header.header_data.GetMinimumHashDataByteSize();
417   if (count > 0 &&
418       m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
419                                       min_total_hash_data_size)) {
420     const bool match = regex.Execute(llvm::StringRef(strp_cstr));
421 
422     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
423       // If the regex doesn't match and we have fixed size data, we can just
424       // add the total byte size of all HashData objects to the hash data
425       // offset and be done...
426       *hash_data_offset_ptr += min_total_hash_data_size;
427     } else {
428       // If the string does match, or we don't have fixed size data then we
429       // need to read the hash data as a stream. If the string matches we also
430       // append all HashData objects to the value array.
431       for (uint32_t i = 0; i < count; ++i) {
432         DIEInfo die_info;
433         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
434           // Only happened if the HashData of the string matched...
435           if (match)
436             pair.value.push_back(die_info);
437         } else {
438           // Something went wrong while reading the data
439           *hash_data_offset_ptr = UINT32_MAX;
440           return eResultError;
441         }
442       }
443     }
444     // Return the correct response depending on if the string matched or not...
445     if (match)
446       return eResultKeyMatch; // The key (cstring) matches and we have lookup
447                               // results!
448     else
449       return eResultKeyMismatch; // The key doesn't match, this function will
450                                  // get called
451     // again for the next key/value or the key terminator which in our case is
452     // a zero .debug_str offset.
453   } else {
454     *hash_data_offset_ptr = UINT32_MAX;
455     return eResultError;
456   }
457 }
458 
459 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex(
460     const lldb_private::RegularExpression &regex,
461     DIEInfoArray &die_info_array) const {
462   const uint32_t hash_count = m_header.hashes_count;
463   Pair pair;
464   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
465     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
466     while (hash_data_offset != UINT32_MAX) {
467       const lldb::offset_t prev_hash_data_offset = hash_data_offset;
468       Result hash_result =
469           AppendHashDataForRegularExpression(regex, &hash_data_offset, pair);
470       if (prev_hash_data_offset == hash_data_offset)
471         break;
472 
473       // Check the result of getting our hash data
474       switch (hash_result) {
475       case eResultKeyMatch:
476       case eResultKeyMismatch:
477         // Whether we matches or not, it doesn't matter, we keep looking.
478         break;
479 
480       case eResultEndOfHashData:
481       case eResultError:
482         hash_data_offset = UINT32_MAX;
483         break;
484       }
485     }
486   }
487   die_info_array.swap(pair.value);
488   return die_info_array.size();
489 }
490 
491 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsInRange(
492     const uint32_t die_offset_start, const uint32_t die_offset_end,
493     DIEInfoArray &die_info_array) const {
494   const uint32_t hash_count = m_header.hashes_count;
495   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
496     bool done = false;
497     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
498     while (!done && hash_data_offset != UINT32_MAX) {
499       KeyType key = m_data.GetU32(&hash_data_offset);
500       // If the key is zero, this terminates our chain of HashData objects for
501       // this hash value.
502       if (key == 0)
503         break;
504 
505       const uint32_t count = m_data.GetU32(&hash_data_offset);
506       for (uint32_t i = 0; i < count; ++i) {
507         DIEInfo die_info;
508         if (m_header.Read(m_data, &hash_data_offset, die_info)) {
509           if (die_info.die_ref.die_offset == 0)
510             done = true;
511           if (die_offset_start <= die_info.die_ref.die_offset &&
512               die_info.die_ref.die_offset < die_offset_end)
513             die_info_array.push_back(die_info);
514         }
515       }
516     }
517   }
518   return die_info_array.size();
519 }
520 
521 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
522                                                 DIEArray &die_offsets) {
523   if (name.empty())
524     return 0;
525 
526   DIEInfoArray die_info_array;
527   if (FindByName(name, die_info_array))
528     DWARFMappedHash::ExtractDIEArray(die_info_array, die_offsets);
529   return die_info_array.size();
530 }
531 
532 size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(llvm::StringRef name,
533                                                       const dw_tag_t tag,
534                                                       DIEArray &die_offsets) {
535   DIEInfoArray die_info_array;
536   if (FindByName(name, die_info_array))
537     DWARFMappedHash::ExtractDIEArray(die_info_array, tag, die_offsets);
538   return die_info_array.size();
539 }
540 
541 size_t DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash(
542     llvm::StringRef name, const dw_tag_t tag,
543     const uint32_t qualified_name_hash, DIEArray &die_offsets) {
544   DIEInfoArray die_info_array;
545   if (FindByName(name, die_info_array))
546     DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash,
547                                      die_offsets);
548   return die_info_array.size();
549 }
550 
551 size_t DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName(
552     llvm::StringRef name, DIEArray &die_offsets, bool must_be_implementation) {
553   DIEInfoArray die_info_array;
554   if (FindByName(name, die_info_array)) {
555     if (must_be_implementation &&
556         GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) {
557       // If we have two atoms, then we have the DIE offset and the type flags
558       // so we can find the objective C class efficiently.
559       DWARFMappedHash::ExtractTypesFromDIEArray(die_info_array, UINT32_MAX,
560                                                 eTypeFlagClassIsImplementation,
561                                                 die_offsets);
562     } else {
563       // We don't only want the one true definition, so try and see what we can
564       // find, and only return class or struct DIEs. If we do have the full
565       // implementation, then return it alone, else return all possible
566       // matches.
567       const bool return_implementation_only_if_available = true;
568       DWARFMappedHash::ExtractClassOrStructDIEArray(
569           die_info_array, return_implementation_only_if_available, die_offsets);
570     }
571   }
572   return die_offsets.size();
573 }
574 
575 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
576                                                 DIEInfoArray &die_info_array) {
577   if (name.empty())
578     return 0;
579 
580   Pair kv_pair;
581   size_t old_size = die_info_array.size();
582   if (Find(name, kv_pair)) {
583     die_info_array.swap(kv_pair.value);
584     return die_info_array.size() - old_size;
585   }
586   return 0;
587 }
588