1 //===-- HashedNameToDIE.cpp -------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "HashedNameToDIE.h"
10 #include "llvm/ADT/StringRef.h"
11 
12 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
13                                       DIEArray &die_offsets) {
14   const size_t count = die_info_array.size();
15   for (size_t i = 0; i < count; ++i)
16     die_offsets.emplace_back(die_info_array[i]);
17 }
18 
19 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
20                                       const dw_tag_t tag,
21                                       DIEArray &die_offsets) {
22   if (tag == 0) {
23     ExtractDIEArray(die_info_array, die_offsets);
24   } else {
25     const size_t count = die_info_array.size();
26     for (size_t i = 0; i < count; ++i) {
27       const dw_tag_t die_tag = die_info_array[i].tag;
28       bool tag_matches = die_tag == 0 || tag == die_tag;
29       if (!tag_matches) {
30         if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
31           tag_matches =
32               tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
33       }
34       if (tag_matches)
35         die_offsets.emplace_back(die_info_array[i]);
36     }
37   }
38 }
39 
40 void DWARFMappedHash::ExtractDIEArray(const DIEInfoArray &die_info_array,
41                                       const dw_tag_t tag,
42                                       const uint32_t qualified_name_hash,
43                                       DIEArray &die_offsets) {
44   if (tag == 0) {
45     ExtractDIEArray(die_info_array, die_offsets);
46   } else {
47     const size_t count = die_info_array.size();
48     for (size_t i = 0; i < count; ++i) {
49       if (qualified_name_hash != die_info_array[i].qualified_name_hash)
50         continue;
51       const dw_tag_t die_tag = die_info_array[i].tag;
52       bool tag_matches = die_tag == 0 || tag == die_tag;
53       if (!tag_matches) {
54         if (die_tag == DW_TAG_class_type || die_tag == DW_TAG_structure_type)
55           tag_matches =
56               tag == DW_TAG_structure_type || tag == DW_TAG_class_type;
57       }
58       if (tag_matches)
59         die_offsets.emplace_back(die_info_array[i]);
60     }
61   }
62 }
63 
64 void DWARFMappedHash::ExtractClassOrStructDIEArray(
65     const DIEInfoArray &die_info_array,
66     bool return_implementation_only_if_available, DIEArray &die_offsets) {
67   const size_t count = die_info_array.size();
68   for (size_t i = 0; i < count; ++i) {
69     const dw_tag_t die_tag = die_info_array[i].tag;
70     if (die_tag == 0 || die_tag == DW_TAG_class_type ||
71         die_tag == DW_TAG_structure_type) {
72       if (die_info_array[i].type_flags & eTypeFlagClassIsImplementation) {
73         if (return_implementation_only_if_available) {
74           // We found the one true definition for this class, so only return
75           // that
76           die_offsets.clear();
77           die_offsets.emplace_back(die_info_array[i]);
78           return;
79         } else {
80           // Put the one true definition as the first entry so it matches first
81           die_offsets.emplace(die_offsets.begin(), die_info_array[i].cu_offset,
82                               die_info_array[i].offset);
83         }
84       } else {
85         die_offsets.emplace_back(die_info_array[i]);
86       }
87     }
88   }
89 }
90 
91 void DWARFMappedHash::ExtractTypesFromDIEArray(
92     const DIEInfoArray &die_info_array, uint32_t type_flag_mask,
93     uint32_t type_flag_value, DIEArray &die_offsets) {
94   const size_t count = die_info_array.size();
95   for (size_t i = 0; i < count; ++i) {
96     if ((die_info_array[i].type_flags & type_flag_mask) == type_flag_value)
97       die_offsets.emplace_back(die_info_array[i]);
98   }
99 }
100 
101 const char *DWARFMappedHash::GetAtomTypeName(uint16_t atom) {
102   switch (atom) {
103   case eAtomTypeNULL:
104     return "NULL";
105   case eAtomTypeDIEOffset:
106     return "die-offset";
107   case eAtomTypeCUOffset:
108     return "cu-offset";
109   case eAtomTypeTag:
110     return "die-tag";
111   case eAtomTypeNameFlags:
112     return "name-flags";
113   case eAtomTypeTypeFlags:
114     return "type-flags";
115   case eAtomTypeQualNameHash:
116     return "qualified-name-hash";
117   }
118   return "<invalid>";
119 }
120 
121 DWARFMappedHash::DIEInfo::DIEInfo()
122     : cu_offset(DW_INVALID_OFFSET), offset(DW_INVALID_OFFSET), tag(0),
123       type_flags(0), qualified_name_hash(0) {}
124 
125 DWARFMappedHash::DIEInfo::DIEInfo(dw_offset_t c, dw_offset_t o, dw_tag_t t,
126                                   uint32_t f, uint32_t h)
127     : cu_offset(c), offset(o), tag(t), type_flags(f), qualified_name_hash(h) {}
128 
129 DWARFMappedHash::Prologue::Prologue(dw_offset_t _die_base_offset)
130     : die_base_offset(_die_base_offset), atoms(), atom_mask(0),
131       min_hash_data_byte_size(0), hash_data_has_fixed_byte_size(true) {
132   // Define an array of DIE offsets by first defining an array, and then define
133   // the atom type for the array, in this case we have an array of DIE offsets
134   AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
135 }
136 
137 void DWARFMappedHash::Prologue::ClearAtoms() {
138   hash_data_has_fixed_byte_size = true;
139   min_hash_data_byte_size = 0;
140   atom_mask = 0;
141   atoms.clear();
142 }
143 
144 bool DWARFMappedHash::Prologue::ContainsAtom(AtomType atom_type) const {
145   return (atom_mask & (1u << atom_type)) != 0;
146 }
147 
148 void DWARFMappedHash::Prologue::Clear() {
149   die_base_offset = 0;
150   ClearAtoms();
151 }
152 
153 void DWARFMappedHash::Prologue::AppendAtom(AtomType type, dw_form_t form) {
154   atoms.push_back({type, form});
155   atom_mask |= 1u << type;
156   switch (form) {
157   case DW_FORM_indirect:
158   case DW_FORM_exprloc:
159   case DW_FORM_flag_present:
160   case DW_FORM_ref_sig8:
161     llvm_unreachable("Unhandled atom form");
162 
163   case DW_FORM_addrx:
164   case DW_FORM_string:
165   case DW_FORM_block:
166   case DW_FORM_block1:
167   case DW_FORM_sdata:
168   case DW_FORM_udata:
169   case DW_FORM_ref_udata:
170   case DW_FORM_GNU_addr_index:
171   case DW_FORM_GNU_str_index:
172     hash_data_has_fixed_byte_size = false;
173     LLVM_FALLTHROUGH;
174   case DW_FORM_flag:
175   case DW_FORM_data1:
176   case DW_FORM_ref1:
177   case DW_FORM_sec_offset:
178     min_hash_data_byte_size += 1;
179     break;
180 
181   case DW_FORM_block2:
182     hash_data_has_fixed_byte_size = false;
183     LLVM_FALLTHROUGH;
184   case DW_FORM_data2:
185   case DW_FORM_ref2:
186     min_hash_data_byte_size += 2;
187     break;
188 
189   case DW_FORM_block4:
190     hash_data_has_fixed_byte_size = false;
191     LLVM_FALLTHROUGH;
192   case DW_FORM_data4:
193   case DW_FORM_ref4:
194   case DW_FORM_addr:
195   case DW_FORM_ref_addr:
196   case DW_FORM_strp:
197     min_hash_data_byte_size += 4;
198     break;
199 
200   case DW_FORM_data8:
201   case DW_FORM_ref8:
202     min_hash_data_byte_size += 8;
203     break;
204   }
205 }
206 
207 lldb::offset_t
208 DWARFMappedHash::Prologue::Read(const lldb_private::DataExtractor &data,
209                                 lldb::offset_t offset) {
210   ClearAtoms();
211 
212   die_base_offset = data.GetU32(&offset);
213 
214   const uint32_t atom_count = data.GetU32(&offset);
215   if (atom_count == 0x00060003u) {
216     // Old format, deal with contents of old pre-release format
217     while (data.GetU32(&offset))
218       /* do nothing */;
219 
220     // Hardcode to the only known value for now.
221     AppendAtom(eAtomTypeDIEOffset, DW_FORM_data4);
222   } else {
223     for (uint32_t i = 0; i < atom_count; ++i) {
224       AtomType type = (AtomType)data.GetU16(&offset);
225       dw_form_t form = (dw_form_t)data.GetU16(&offset);
226       AppendAtom(type, form);
227     }
228   }
229   return offset;
230 }
231 
232 size_t DWARFMappedHash::Prologue::GetByteSize() const {
233   // Add an extra count to the atoms size for the zero termination Atom that
234   // gets written to disk
235   return sizeof(die_base_offset) + sizeof(uint32_t) +
236          atoms.size() * sizeof(Atom);
237 }
238 
239 size_t DWARFMappedHash::Prologue::GetMinimumHashDataByteSize() const {
240   return min_hash_data_byte_size;
241 }
242 
243 bool DWARFMappedHash::Prologue::HashDataHasFixedByteSize() const {
244   return hash_data_has_fixed_byte_size;
245 }
246 
247 size_t DWARFMappedHash::Header::GetByteSize(const HeaderData &header_data) {
248   return header_data.GetByteSize();
249 }
250 
251 lldb::offset_t DWARFMappedHash::Header::Read(lldb_private::DataExtractor &data,
252                                              lldb::offset_t offset) {
253   offset = MappedHash::Header<Prologue>::Read(data, offset);
254   if (offset != UINT32_MAX) {
255     offset = header_data.Read(data, offset);
256   }
257   return offset;
258 }
259 
260 bool DWARFMappedHash::Header::Read(const lldb_private::DWARFDataExtractor &data,
261                                    lldb::offset_t *offset_ptr,
262                                    DIEInfo &hash_data) const {
263   const size_t num_atoms = header_data.atoms.size();
264   if (num_atoms == 0)
265     return false;
266 
267   for (size_t i = 0; i < num_atoms; ++i) {
268     DWARFFormValue form_value(NULL, header_data.atoms[i].form);
269 
270     if (!form_value.ExtractValue(data, offset_ptr))
271       return false;
272 
273     switch (header_data.atoms[i].type) {
274     case eAtomTypeDIEOffset: // DIE offset, check form for encoding
275       hash_data.offset =
276           DWARFFormValue::IsDataForm(form_value.Form())
277               ? form_value.Unsigned()
278               : form_value.Reference(header_data.die_base_offset);
279       break;
280 
281     case eAtomTypeTag: // DW_TAG value for the DIE
282       hash_data.tag = (dw_tag_t)form_value.Unsigned();
283       break;
284 
285     case eAtomTypeTypeFlags: // Flags from enum TypeFlags
286       hash_data.type_flags = (uint32_t)form_value.Unsigned();
287       break;
288 
289     case eAtomTypeQualNameHash: // Flags from enum TypeFlags
290       hash_data.qualified_name_hash = form_value.Unsigned();
291       break;
292 
293     default:
294       // We can always skip atoms we don't know about
295       break;
296     }
297   }
298   return true;
299 }
300 
301 DWARFMappedHash::MemoryTable::MemoryTable(
302     lldb_private::DWARFDataExtractor &table_data,
303     const lldb_private::DWARFDataExtractor &string_table, const char *name)
304     : MappedHash::MemoryTable<uint32_t, Header, DIEInfoArray>(table_data),
305       m_data(table_data), m_string_table(string_table), m_name(name) {}
306 
307 const char *
308 DWARFMappedHash::MemoryTable::GetStringForKeyType(KeyType key) const {
309   // The key in the DWARF table is the .debug_str offset for the string
310   return m_string_table.PeekCStr(key);
311 }
312 
313 bool DWARFMappedHash::MemoryTable::ReadHashData(uint32_t hash_data_offset,
314                                                 HashData &hash_data) const {
315   lldb::offset_t offset = hash_data_offset;
316   offset += 4; // Skip string table offset that contains offset of hash name in
317                // .debug_str
318   const uint32_t count = m_data.GetU32(&offset);
319   if (count > 0) {
320     hash_data.resize(count);
321     for (uint32_t i = 0; i < count; ++i) {
322       if (!m_header.Read(m_data, &offset, hash_data[i]))
323         return false;
324     }
325   } else
326     hash_data.clear();
327   return true;
328 }
329 
330 DWARFMappedHash::MemoryTable::Result
331 DWARFMappedHash::MemoryTable::GetHashDataForName(
332     llvm::StringRef name, lldb::offset_t *hash_data_offset_ptr,
333     Pair &pair) const {
334   pair.key = m_data.GetU32(hash_data_offset_ptr);
335   pair.value.clear();
336 
337   // If the key is zero, this terminates our chain of HashData objects for this
338   // hash value.
339   if (pair.key == 0)
340     return eResultEndOfHashData;
341 
342   // There definitely should be a string for this string offset, if there
343   // isn't, there is something wrong, return and error
344   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
345   if (strp_cstr == NULL) {
346     *hash_data_offset_ptr = UINT32_MAX;
347     return eResultError;
348   }
349 
350   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
351   const size_t min_total_hash_data_size =
352       count * m_header.header_data.GetMinimumHashDataByteSize();
353   if (count > 0 &&
354       m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
355                                       min_total_hash_data_size)) {
356     // We have at least one HashData entry, and we have enough data to parse at
357     // least "count" HashData entries.
358 
359     // First make sure the entire C string matches...
360     const bool match = name == strp_cstr;
361 
362     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
363       // If the string doesn't match and we have fixed size data, we can just
364       // add the total byte size of all HashData objects to the hash data
365       // offset and be done...
366       *hash_data_offset_ptr += min_total_hash_data_size;
367     } else {
368       // If the string does match, or we don't have fixed size data then we
369       // need to read the hash data as a stream. If the string matches we also
370       // append all HashData objects to the value array.
371       for (uint32_t i = 0; i < count; ++i) {
372         DIEInfo die_info;
373         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
374           // Only happened if the HashData of the string matched...
375           if (match)
376             pair.value.push_back(die_info);
377         } else {
378           // Something went wrong while reading the data
379           *hash_data_offset_ptr = UINT32_MAX;
380           return eResultError;
381         }
382       }
383     }
384     // Return the correct response depending on if the string matched or not...
385     if (match)
386       return eResultKeyMatch; // The key (cstring) matches and we have lookup
387                               // results!
388     else
389       return eResultKeyMismatch; // The key doesn't match, this function will
390                                  // get called
391     // again for the next key/value or the key terminator which in our case is
392     // a zero .debug_str offset.
393   } else {
394     *hash_data_offset_ptr = UINT32_MAX;
395     return eResultError;
396   }
397 }
398 
399 DWARFMappedHash::MemoryTable::Result
400 DWARFMappedHash::MemoryTable::AppendHashDataForRegularExpression(
401     const lldb_private::RegularExpression &regex,
402     lldb::offset_t *hash_data_offset_ptr, Pair &pair) const {
403   pair.key = m_data.GetU32(hash_data_offset_ptr);
404   // If the key is zero, this terminates our chain of HashData objects for this
405   // hash value.
406   if (pair.key == 0)
407     return eResultEndOfHashData;
408 
409   // There definitely should be a string for this string offset, if there
410   // isn't, there is something wrong, return and error
411   const char *strp_cstr = m_string_table.PeekCStr(pair.key);
412   if (strp_cstr == NULL)
413     return eResultError;
414 
415   const uint32_t count = m_data.GetU32(hash_data_offset_ptr);
416   const size_t min_total_hash_data_size =
417       count * m_header.header_data.GetMinimumHashDataByteSize();
418   if (count > 0 &&
419       m_data.ValidOffsetForDataOfSize(*hash_data_offset_ptr,
420                                       min_total_hash_data_size)) {
421     const bool match = regex.Execute(llvm::StringRef(strp_cstr));
422 
423     if (!match && m_header.header_data.HashDataHasFixedByteSize()) {
424       // If the regex doesn't match and we have fixed size data, we can just
425       // add the total byte size of all HashData objects to the hash data
426       // offset and be done...
427       *hash_data_offset_ptr += min_total_hash_data_size;
428     } else {
429       // If the string does match, or we don't have fixed size data then we
430       // need to read the hash data as a stream. If the string matches we also
431       // append all HashData objects to the value array.
432       for (uint32_t i = 0; i < count; ++i) {
433         DIEInfo die_info;
434         if (m_header.Read(m_data, hash_data_offset_ptr, die_info)) {
435           // Only happened if the HashData of the string matched...
436           if (match)
437             pair.value.push_back(die_info);
438         } else {
439           // Something went wrong while reading the data
440           *hash_data_offset_ptr = UINT32_MAX;
441           return eResultError;
442         }
443       }
444     }
445     // Return the correct response depending on if the string matched or not...
446     if (match)
447       return eResultKeyMatch; // The key (cstring) matches and we have lookup
448                               // results!
449     else
450       return eResultKeyMismatch; // The key doesn't match, this function will
451                                  // get called
452     // again for the next key/value or the key terminator which in our case is
453     // a zero .debug_str offset.
454   } else {
455     *hash_data_offset_ptr = UINT32_MAX;
456     return eResultError;
457   }
458 }
459 
460 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsThatMatchingRegex(
461     const lldb_private::RegularExpression &regex,
462     DIEInfoArray &die_info_array) const {
463   const uint32_t hash_count = m_header.hashes_count;
464   Pair pair;
465   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
466     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
467     while (hash_data_offset != UINT32_MAX) {
468       const lldb::offset_t prev_hash_data_offset = hash_data_offset;
469       Result hash_result =
470           AppendHashDataForRegularExpression(regex, &hash_data_offset, pair);
471       if (prev_hash_data_offset == hash_data_offset)
472         break;
473 
474       // Check the result of getting our hash data
475       switch (hash_result) {
476       case eResultKeyMatch:
477       case eResultKeyMismatch:
478         // Whether we matches or not, it doesn't matter, we keep looking.
479         break;
480 
481       case eResultEndOfHashData:
482       case eResultError:
483         hash_data_offset = UINT32_MAX;
484         break;
485       }
486     }
487   }
488   die_info_array.swap(pair.value);
489   return die_info_array.size();
490 }
491 
492 size_t DWARFMappedHash::MemoryTable::AppendAllDIEsInRange(
493     const uint32_t die_offset_start, const uint32_t die_offset_end,
494     DIEInfoArray &die_info_array) const {
495   const uint32_t hash_count = m_header.hashes_count;
496   for (uint32_t offset_idx = 0; offset_idx < hash_count; ++offset_idx) {
497     bool done = false;
498     lldb::offset_t hash_data_offset = GetHashDataOffset(offset_idx);
499     while (!done && hash_data_offset != UINT32_MAX) {
500       KeyType key = m_data.GetU32(&hash_data_offset);
501       // If the key is zero, this terminates our chain of HashData objects for
502       // this hash value.
503       if (key == 0)
504         break;
505 
506       const uint32_t count = m_data.GetU32(&hash_data_offset);
507       for (uint32_t i = 0; i < count; ++i) {
508         DIEInfo die_info;
509         if (m_header.Read(m_data, &hash_data_offset, die_info)) {
510           if (die_info.offset == 0)
511             done = true;
512           if (die_offset_start <= die_info.offset &&
513               die_info.offset < die_offset_end)
514             die_info_array.push_back(die_info);
515         }
516       }
517     }
518   }
519   return die_info_array.size();
520 }
521 
522 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
523                                                 DIEArray &die_offsets) {
524   if (name.empty())
525     return 0;
526 
527   DIEInfoArray die_info_array;
528   if (FindByName(name, die_info_array))
529     DWARFMappedHash::ExtractDIEArray(die_info_array, die_offsets);
530   return die_info_array.size();
531 }
532 
533 size_t DWARFMappedHash::MemoryTable::FindByNameAndTag(llvm::StringRef name,
534                                                       const dw_tag_t tag,
535                                                       DIEArray &die_offsets) {
536   DIEInfoArray die_info_array;
537   if (FindByName(name, die_info_array))
538     DWARFMappedHash::ExtractDIEArray(die_info_array, tag, die_offsets);
539   return die_info_array.size();
540 }
541 
542 size_t DWARFMappedHash::MemoryTable::FindByNameAndTagAndQualifiedNameHash(
543     llvm::StringRef name, const dw_tag_t tag,
544     const uint32_t qualified_name_hash, DIEArray &die_offsets) {
545   DIEInfoArray die_info_array;
546   if (FindByName(name, die_info_array))
547     DWARFMappedHash::ExtractDIEArray(die_info_array, tag, qualified_name_hash,
548                                      die_offsets);
549   return die_info_array.size();
550 }
551 
552 size_t DWARFMappedHash::MemoryTable::FindCompleteObjCClassByName(
553     llvm::StringRef name, DIEArray &die_offsets, bool must_be_implementation) {
554   DIEInfoArray die_info_array;
555   if (FindByName(name, die_info_array)) {
556     if (must_be_implementation &&
557         GetHeader().header_data.ContainsAtom(eAtomTypeTypeFlags)) {
558       // If we have two atoms, then we have the DIE offset and the type flags
559       // so we can find the objective C class efficiently.
560       DWARFMappedHash::ExtractTypesFromDIEArray(die_info_array, UINT32_MAX,
561                                                 eTypeFlagClassIsImplementation,
562                                                 die_offsets);
563     } else {
564       // We don't only want the one true definition, so try and see what we can
565       // find, and only return class or struct DIEs. If we do have the full
566       // implementation, then return it alone, else return all possible
567       // matches.
568       const bool return_implementation_only_if_available = true;
569       DWARFMappedHash::ExtractClassOrStructDIEArray(
570           die_info_array, return_implementation_only_if_available, die_offsets);
571     }
572   }
573   return die_offsets.size();
574 }
575 
576 size_t DWARFMappedHash::MemoryTable::FindByName(llvm::StringRef name,
577                                                 DIEInfoArray &die_info_array) {
578   if (name.empty())
579     return 0;
580 
581   Pair kv_pair;
582   size_t old_size = die_info_array.size();
583   if (Find(name, kv_pair)) {
584     die_info_array.swap(kv_pair.value);
585     return die_info_array.size() - old_size;
586   }
587   return 0;
588 }
589