1 //===-- HashedNameToDIE.cpp -------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HashedNameToDIE.h"
10 #include "llvm/ADT/StringRef.h"
11 
12 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
13                                       DIEArray &die_offsets) {
14   const size_t count = die_info_array.size();
15   for (size_t i = 0; i < count; ++i)
16     die_offsets.emplace_back(die_info_array[i].cu_offset,
17                              die_info_array[i].offset);
18 }
19 
20 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
21                                       const dw_tag_t tag,
22                                       DIEArray &die_offsets) {
23   if (tag == 0) {
24     ExtractDIEArray(die_info_array, die_offsets);
25   } else {
26     const size_t count = die_info_array.size();
27     for (size_t i = 0; i < count; ++i) {
28       const dw_tag_t die_tag = die_info_array[i].tag;
29       bool tag_matches = die_tag == 0 || tag == die_tag;
30       if (!tag_matches) {
31         if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
32           tag_matches =
33               tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
34       }
35       if (tag_matches)
36         die_offsets.emplace_back(die_info_array[i].cu_offset,
37                                  die_info_array[i].offset);
38     }
39   }
40 }
41 
42 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
43                                       const dw_tag_t tag,
44                                       const uint32_t qualified_name_hash,
45                                       DIEArray &die_offsets) {
46   if (tag == 0) {
47     ExtractDIEArray(die_info_array, die_offsets);
48   } else {
49     const size_t count = die_info_array.size();
50     for (size_t i = 0; i < count; ++i) {
51       if (qualified_name_hash != die_info_array[i].qualified_name_hash)
52         continue;
53       const dw_tag_t die_tag = die_info_array[i].tag;
54       bool tag_matches = die_tag == 0 || tag == die_tag;
55       if (!tag_matches) {
56         if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
57           tag_matches =
58               tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
59       }
60       if (tag_matches)
61         die_offsets.emplace_back(die_info_array[i].cu_offset,
62                                  die_info_array[i].offset);
63     }
64   }
65 }
66 
67 void DWARFMappedHash::ExtractClassOrStructDIEArray(
68     const DIEInfoArray &die_info_array,
69     bool return_implementation_only_if_available, DIEArray &die_offsets) {
70   const size_t count = die_info_array.size();
71   for (size_t i = 0; i < count; ++i) {
72     const dw_tag_t die_tag = die_info_array[i].tag;
73     if (die_tag == 0 || die_tag == DW_TAG_class_type ||
74         die_tag == DW_TAG_structure_type) {
75       if (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) {
76         if (return_implementation_only_if_available) {
77           // We found the one true definition for this class, so only return
78           // that
79           die_offsets.clear();
80           die_offsets.emplace_back(die_info_array[i].cu_offset,
81                                    die_info_array[i].offset);
82           return;
83         } else {
84           // Put the one true definition as the first entry so it matches first
85           die_offsets.emplace(die_offsets.begin(), die_info_array[i].cu_offset,
86                               die_info_array[i].offset);
87         }
88       } else {
89         die_offsets.emplace_back(die_info_array[i].cu_offset,
90                                  die_info_array[i].offset);
91       }
92     }
93   }
94 }
95 
96 void DWARFMappedHash::ExtractTypesFromDIEArray(
97     const DIEInfoArray &die_info_array, uint32_t type_flag_mask,
98     uint32_t type_flag_value, DIEArray &die_offsets) {
99   const size_t count = die_info_array.size();
100   for (size_t i = 0; i < count; ++i) {
101     if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value)
102       die_offsets.emplace_back(die_info_array[i].cu_offset,
103                                die_info_array[i].offset);
104   }
105 }
106 
107 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) {
108   switch (atom) {
109   case eAtomTypeNULL:
110     return "NULL";
111   case eAtomTypeDIEOffset:
112     return "die-offset";
113   case eAtomTypeCUOffset:
114     return "cu-offset";
115   case eAtomTypeTag:
116     return "die-tag";
117   case eAtomTypeNameFlags:
118     return "name-flags";
119   case eAtomTypeTypeFlags:
120     return "type-flags";
121   case eAtomTypeQualNameHash:
122     return "qualified-name-hash";
123   }
124   return "<invalid>";
125 }
126 
127 DWARFMappedHash::DIEInfo::DIEInfo()
128     : cu_offset(DW_INVALID_OFFSET), offset(DW_INVALID_OFFSET), tag(0),
129       type_flags(0), qualified_name_hash(0) {}
130 
131 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t c, dw_offset_t o, dw_tag_t t,
132                                   uint32_t f, uint32_t h)
133     : cu_offset(c), offset(o), tag(t), type_flags(f), qualified_name_hash(h) {}
134 
135 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset)
136     : die_base_offset(_die_base_offset), atoms(), atom_mask(0),
137       min_hash_data_byte_size(0), hash_data_has_fixed_byte_size(true) {
138   // Define an array of DIE offsets by first defining an array, and then define
139   // the atom type for the array, in this case we have an array of DIE offsets
140   AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
141 }
142 
143 void DWARFMappedHash::Prologue::ClearAtoms() {
144   hash_data_has_fixed_byte_size = true;
145   min_hash_data_byte_size = 0;
146   atom_mask = 0;
147   atoms.clear();
148 }
149 
150 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const {
151   return (atom_mask & (1u << atom_type)) != 0;
152 }
153 
154 void DWARFMappedHash::Prologue::Clear() {
155   die_base_offset = 0;
156   ClearAtoms();
157 }
158 
159 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) {
160   atoms.push_back({type, form});
161   atom_mask |= 1u << type;
162   switch (form) {
163   case DW_FORM_indirect:
164   case DW_FORM_exprloc:
165   case DW_FORM_flag_present:
166   case DW_FORM_ref_sig8:
167     llvm_unreachable("Unhandled atom form");
168 
169   case DW_FORM_addrx:
170   case DW_FORM_string:
171   case DW_FORM_block:
172   case DW_FORM_block1:
173   case DW_FORM_sdata:
174   case DW_FORM_udata:
175   case DW_FORM_ref_udata:
176   case DW_FORM_GNU_addr_index:
177   case DW_FORM_GNU_str_index:
178     hash_data_has_fixed_byte_size = false;
179     LLVM_FALLTHROUGH;
180   case DW_FORM_flag:
181   case DW_FORM_data1:
182   case DW_FORM_ref1:
183   case DW_FORM_sec_offset:
184     min_hash_data_byte_size += 1;
185     break;
186 
187   case DW_FORM_block2:
188     hash_data_has_fixed_byte_size = false;
189     LLVM_FALLTHROUGH;
190   case DW_FORM_data2:
191   case DW_FORM_ref2:
192     min_hash_data_byte_size += 2;
193     break;
194 
195   case DW_FORM_block4:
196     hash_data_has_fixed_byte_size = false;
197     LLVM_FALLTHROUGH;
198   case DW_FORM_data4:
199   case DW_FORM_ref4:
200   case DW_FORM_addr:
201   case DW_FORM_ref_addr:
202   case DW_FORM_strp:
203     min_hash_data_byte_size += 4;
204     break;
205 
206   case DW_FORM_data8:
207   case DW_FORM_ref8:
208     min_hash_data_byte_size += 8;
209     break;
210   }
211 }
212 
213 lldb::offset_t
214 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data,
215                                 lldb::offset_t offset) {
216   ClearAtoms();
217 
218   die_base_offset = data.GetU32(&offset);
219 
220   const uint32_t atom_count = data.GetU32(&offset);
221   if (atom_count == 0x00060003u) {
222     // Old format, deal with contents of old pre-release format
223     while (data.GetU32(&offset))
224       /* do nothing */;
225 
226     // Hardcode to the only known value for now.
227     AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
228   } else {
229     for (uint32_t i = 0; i < atom_count; ++i) {
230       AtomType type = (AtomType)data.GetU16(&offset);
231       dw_form_t form = (dw_form_t)data.GetU16(&offset);
232       AppendAtom(type, form);
233     }
234   }
235   return offset;
236 }
237 
238 size_t DWARFMappedHash::Prologue::GetByteSize() const {
239   // Add an extra count to the atoms size for the zero termination Atom that
240   // gets written to disk
241   return sizeof(die_base_offset) + sizeof(uint32_t) +
242          atoms.size() * sizeof(Atom);
243 }
244 
245 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const {
246   return min_hash_data_byte_size;
247 }
248 
249 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const {
250   return hash_data_has_fixed_byte_size;
251 }
252 
253 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) {
254   return header_data.GetByteSize();
255 }
256 
257 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data,
258                                              lldb::offset_t offset) {
259   offset = MappedHash::Header<Prologue>::Read(data, offset);
260   if (offset != UINT32_MAX) {
261     offset = header_data.Read(data, offset);
262   }
263   return offset;
264 }
265 
266 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data,
267                                    lldb::offset_t *offset_ptr,
268                                    DIEInfo &hash_data) const {
269   const size_t num_atoms = header_data.atoms.size();
270   if (num_atoms == 0)
271     return false;
272 
273   for (size_t i = 0; i < num_atoms; ++i) {
274     DWARFFormValue form_value(NULL, header_data.atoms[i].form);
275 
276     if (!form_value.ExtractValue(data, offset_ptr))
277       return false;
278 
279     switch (header_data.atoms[i].type) {
280     case eAtomTypeDIEOffset: // DIE offset, check form for encoding
281       hash_data.offset =
282           DWARFFormValue::IsDataForm(form_value.Form())
283               ? form_value.Unsigned()
284               : form_value.Reference(header_data.die_base_offset);
285       break;
286 
287     case eAtomTypeTag: // DW_TAG value for the DIE
288       hash_data.tag = (dw_tag_t)form_value.Unsigned();
289       break;
290 
291     case eAtomTypeTypeFlags: // Flags from enum TypeFlags
292       hash_data.type_flags = (uint32_t)form_value.Unsigned();
293       break;
294 
295     case eAtomTypeQualNameHash: // Flags from enum TypeFlags
296       hash_data.qualified_name_hash = form_value.Unsigned();
297       break;
298 
299     default:
300       // We can always skip atoms we don't know about
301       break;
302     }
303   }
304   return true;
305 }
306 
307 DWARFMappedHash::MemoryTable::MemoryTable(
308     lldb_private::DWARFDataExtractor &table_data,
309     const lldb_private::DWARFDataExtractor &string_table, const char *name)
310     : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data),
311       m_data(table_data), m_string_table(string_table), m_name(name) {}
312 
313 const char *
314 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const {
315   // The key in the DWARF table is the .debug_str offset for the string
316   return m_string_table.PeekCStr(key);
317 }
318 
319 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset,
320                                                 HashData &hash_data) const {
321   lldb::offset_t offset = hash_data_offset;
322   offset += 4; // Skip string table offset that contains offset of hash name in
323                // .debug_str
324   const uint32_t count = m_data.GetU32(&offset);
325   if (count > 0) {
326     hash_data.resize(count);
327     for (uint32_t i = 0; i < count; ++i) {
328       if (!m_header.Read(m_data, &offset, hash_data[i]))
329         return false;
330     }
331   } else
332     hash_data.clear();
333   return true;
334 }
335 
336 DWARFMappedHash::MemoryTable::Result
337 DWARFMappedHash::MemoryTable::GetHashDataForName(
338     llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr,
339     Pair &pair) const {
340   pair.key = m_data.GetU32(hash_data_offset_ptr);
341   pair.value.clear();
342 
343   // If the key is zero, this terminates our chain of HashData objects for this
344   // hash value.
345   if (pair.key == 0)
346     return eResultEndOfHashData;
347 
348   // There definitely should be a string for this string offset, if there
349   // isn't, there is something wrong, return and error
350   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
351   if (strp_cstr == NULL) {
352     *hash_data_offset_ptr = UINT32_MAX;
353     return eResultError;
354   }
355 
356   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
357   const size_t min_total_hash_data_size =
358       count * m_header.header_data.GetMinimumHashDataByteSize();
359   if (count > 0 &&
360       m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
361                                       min_total_hash_data_size)) {
362     // We have at least one HashData entry, and we have enough data to parse at
363     // least "count" HashData entries.
364 
365     // First make sure the entire C string matches...
366     const bool match = name == strp_cstr;
367 
368     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
369       // If the string doesn't match and we have fixed size data, we can just
370       // add the total byte size of all HashData objects to the hash data
371       // offset and be done...
372       *hash_data_offset_ptr += min_total_hash_data_size;
373     } else {
374       // If the string does match, or we don't have fixed size data then we
375       // need to read the hash data as a stream. If the string matches we also
376       // append all HashData objects to the value array.
377       for (uint32_t i = 0; i < count; ++i) {
378         DIEInfo die_info;
379         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
380           // Only happened if the HashData of the string matched...
381           if (match)
382             pair.value.push_back(die_info);
383         } else {
384           // Something went wrong while reading the data
385           *hash_data_offset_ptr = UINT32_MAX;
386           return eResultError;
387         }
388       }
389     }
390     // Return the correct response depending on if the string matched or not...
391     if (match)
392       return eResultKeyMatch; // The key (cstring) matches and we have lookup
393                               // results!
394     else
395       return eResultKeyMismatch; // The key doesn't match, this function will
396                                  // get called
397     // again for the next key/value or the key terminator which in our case is
398     // a zero .debug_str offset.
399   } else {
400     *hash_data_offset_ptr = UINT32_MAX;
401     return eResultError;
402   }
403 }
404 
405 DWARFMappedHash::MemoryTable::Result
406 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression(
407     const lldb_private::RegularExpression &regex,
408     lldb::offset_t *hash_data_offset_ptr, Pair &pair) const {
409   pair.key = m_data.GetU32(hash_data_offset_ptr);
410   // If the key is zero, this terminates our chain of HashData objects for this
411   // hash value.
412   if (pair.key == 0)
413     return eResultEndOfHashData;
414 
415   // There definitely should be a string for this string offset, if there
416   // isn't, there is something wrong, return and error
417   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
418   if (strp_cstr == NULL)
419     return eResultError;
420 
421   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
422   const size_t min_total_hash_data_size =
423       count * m_header.header_data.GetMinimumHashDataByteSize();
424   if (count > 0 &&
425       m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
426                                       min_total_hash_data_size)) {
427     const bool match = regex.Execute(llvm::StringRef(strp_cstr));
428 
429     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
430       // If the regex doesn't match and we have fixed size data, we can just
431       // add the total byte size of all HashData objects to the hash data
432       // offset and be done...
433       *hash_data_offset_ptr += min_total_hash_data_size;
434     } else {
435       // If the string does match, or we don't have fixed size data then we
436       // need to read the hash data as a stream. If the string matches we also
437       // append all HashData objects to the value array.
438       for (uint32_t i = 0; i < count; ++i) {
439         DIEInfo die_info;
440         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
441           // Only happened if the HashData of the string matched...
442           if (match)
443             pair.value.push_back(die_info);
444         } else {
445           // Something went wrong while reading the data
446           *hash_data_offset_ptr = UINT32_MAX;
447           return eResultError;
448         }
449       }
450     }
451     // Return the correct response depending on if the string matched or not...
452     if (match)
453       return eResultKeyMatch; // The key (cstring) matches and we have lookup
454                               // results!
455     else
456       return eResultKeyMismatch; // The key doesn't match, this function will
457                                  // get called
458     // again for the next key/value or the key terminator which in our case is
459     // a zero .debug_str offset.
460   } else {
461     *hash_data_offset_ptr = UINT32_MAX;
462     return eResultError;
463   }
464 }
465 
466 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex(
467     const lldb_private::RegularExpression &regex,
468     DIEInfoArray &die_info_array) const {
469   const uint32_t hash_count = m_header.hashes_count;
470   Pair pair;
471   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
472     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
473     while (hash_data_offset != UINT32_MAX) {
474       const lldb::offset_t prev_hash_data_offset = hash_data_offset;
475       Result hash_result =
476           AppendHashDataForRegularExpression(regex, &hash_data_offset, pair);
477       if (prev_hash_data_offset == hash_data_offset)
478         break;
479 
480       // Check the result of getting our hash data
481       switch (hash_result) {
482       case eResultKeyMatch:
483       case eResultKeyMismatch:
484         // Whether we matches or not, it doesn't matter, we keep looking.
485         break;
486 
487       case eResultEndOfHashData:
488       case eResultError:
489         hash_data_offset = UINT32_MAX;
490         break;
491       }
492     }
493   }
494   die_info_array.swap(pair.value);
495   return die_info_array.size();
496 }
497 
498 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsInRange(
499     const uint32_t die_offset_start, const uint32_t die_offset_end,
500     DIEInfoArray &die_info_array) const {
501   const uint32_t hash_count = m_header.hashes_count;
502   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
503     bool done = false;
504     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
505     while (!done && hash_data_offset != UINT32_MAX) {
506       KeyType key = m_data.GetU32(&hash_data_offset);
507       // If the key is zero, this terminates our chain of HashData objects for
508       // this hash value.
509       if (key == 0)
510         break;
511 
512       const uint32_t count = m_data.GetU32(&hash_data_offset);
513       for (uint32_t i = 0; i < count; ++i) {
514         DIEInfo die_info;
515         if (m_header.Read(m_data, &hash_data_offset, die_info)) {
516           if (die_info.offset == 0)
517             done = true;
518           if (die_offset_start <= die_info.offset &&
519               die_info.offset < die_offset_end)
520             die_info_array.push_back(die_info);
521         }
522       }
523     }
524   }
525   return die_info_array.size();
526 }
527 
528 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
529                                                 DIEArray &die_offsets) {
530   if (name.empty())
531     return 0;
532 
533   DIEInfoArray die_info_array;
534   if (FindByName(name, die_info_array))
535     DWARFMappedHash::ExtractDIEArray(die_info_array, die_offsets);
536   return die_info_array.size();
537 }
538 
539 size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(llvm::StringRef name,
540                                                       const dw_tag_t tag,
541                                                       DIEArray &die_offsets) {
542   DIEInfoArray die_info_array;
543   if (FindByName(name, die_info_array))
544     DWARFMappedHash::ExtractDIEArray(die_info_array, tag, die_offsets);
545   return die_info_array.size();
546 }
547 
548 size_t DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash(
549     llvm::StringRef name, const dw_tag_t tag,
550     const uint32_t qualified_name_hash, DIEArray &die_offsets) {
551   DIEInfoArray die_info_array;
552   if (FindByName(name, die_info_array))
553     DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash,
554                                      die_offsets);
555   return die_info_array.size();
556 }
557 
558 size_t DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName(
559     llvm::StringRef name, DIEArray &die_offsets, bool must_be_implementation) {
560   DIEInfoArray die_info_array;
561   if (FindByName(name, die_info_array)) {
562     if (must_be_implementation &&
563         GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) {
564       // If we have two atoms, then we have the DIE offset and the type flags
565       // so we can find the objective C class efficiently.
566       DWARFMappedHash::ExtractTypesFromDIEArray(die_info_array, UINT32_MAX,
567                                                 eTypeFlagClassIsImplementation,
568                                                 die_offsets);
569     } else {
570       // We don't only want the one true definition, so try and see what we can
571       // find, and only return class or struct DIEs. If we do have the full
572       // implementation, then return it alone, else return all possible
573       // matches.
574       const bool return_implementation_only_if_available = true;
575       DWARFMappedHash::ExtractClassOrStructDIEArray(
576           die_info_array, return_implementation_only_if_available, die_offsets);
577     }
578   }
579   return die_offsets.size();
580 }
581 
582 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
583                                                 DIEInfoArray &die_info_array) {
584   if (name.empty())
585     return 0;
586 
587   Pair kv_pair;
588   size_t old_size = die_info_array.size();
589   if (Find(name, kv_pair)) {
590     die_info_array.swap(kv_pair.value);
591     return die_info_array.size() - old_size;
592   }
593   return 0;
594 }
595