1 //===-- DWARFUnit.cpp -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DWARFUnit.h"
10 
11 #include "lldb/Core/Module.h"
12 #include "lldb/Host/StringConvert.h"
13 #include "lldb/Symbol/CompileUnit.h"
14 #include "lldb/Symbol/LineTable.h"
15 #include "lldb/Symbol/ObjectFile.h"
16 #include "lldb/Utility/LLDBAssert.h"
17 #include "lldb/Utility/StreamString.h"
18 #include "lldb/Utility/Timer.h"
19 
20 #include "DWARFDebugAranges.h"
21 #include "DWARFDebugInfo.h"
22 #include "LogChannelDWARF.h"
23 #include "SymbolFileDWARFDebugMap.h"
24 #include "SymbolFileDWARFDwo.h"
25 
26 using namespace lldb;
27 using namespace lldb_private;
28 using namespace std;
29 
30 extern int g_verbose;
31 
32 DWARFUnit::DWARFUnit(SymbolFileDWARF *dwarf)
33     : m_dwarf(dwarf), m_cancel_scopes(false) {}
34 
35 DWARFUnit::~DWARFUnit() {}
36 
37 //----------------------------------------------------------------------
38 // Parses first DIE of a compile unit.
39 //----------------------------------------------------------------------
40 void DWARFUnit::ExtractUnitDIEIfNeeded() {
41   {
42     llvm::sys::ScopedReader lock(m_first_die_mutex);
43     if (m_first_die)
44       return; // Already parsed
45   }
46   llvm::sys::ScopedWriter lock(m_first_die_mutex);
47   if (m_first_die)
48     return; // Already parsed
49 
50   static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
51   Timer scoped_timer(
52       func_cat, "%8.8x: DWARFUnit::ExtractUnitDIEIfNeeded()", m_offset);
53 
54   // Set the offset to that of the first DIE and calculate the start of the
55   // next compilation unit header.
56   lldb::offset_t offset = GetFirstDIEOffset();
57 
58   // We are in our compile unit, parse starting at the offset we were told to
59   // parse
60   const DWARFDataExtractor &data = GetData();
61   DWARFFormValue::FixedFormSizes fixed_form_sizes =
62       DWARFFormValue::GetFixedFormSizesForAddressSize(GetAddressByteSize());
63   if (offset < GetNextCompileUnitOffset() &&
64       m_first_die.FastExtract(data, this, fixed_form_sizes, &offset)) {
65     AddUnitDIE(m_first_die);
66     return;
67   }
68 }
69 
70 //----------------------------------------------------------------------
71 // Parses a compile unit and indexes its DIEs if it hasn't already been done.
72 // It will leave this compile unit extracted forever.
73 //----------------------------------------------------------------------
74 void DWARFUnit::ExtractDIEsIfNeeded() {
75   m_cancel_scopes = true;
76 
77   {
78     llvm::sys::ScopedReader lock(m_die_array_mutex);
79     if (!m_die_array.empty())
80       return; // Already parsed
81   }
82   llvm::sys::ScopedWriter lock(m_die_array_mutex);
83   if (!m_die_array.empty())
84     return; // Already parsed
85 
86   ExtractDIEsRWLocked();
87 }
88 
89 //----------------------------------------------------------------------
90 // Parses a compile unit and indexes its DIEs if it hasn't already been done.
91 // It will clear this compile unit after returned instance gets out of scope,
92 // no other ScopedExtractDIEs instance is running for this compile unit
93 // and no ExtractDIEsIfNeeded() has been executed during this ScopedExtractDIEs
94 // lifetime.
95 //----------------------------------------------------------------------
96 DWARFUnit::ScopedExtractDIEs DWARFUnit::ExtractDIEsScoped() {
97   ScopedExtractDIEs scoped(this);
98 
99   {
100     llvm::sys::ScopedReader lock(m_die_array_mutex);
101     if (!m_die_array.empty())
102       return scoped; // Already parsed
103   }
104   llvm::sys::ScopedWriter lock(m_die_array_mutex);
105   if (!m_die_array.empty())
106     return scoped; // Already parsed
107 
108   // Otherwise m_die_array would be already populated.
109   lldbassert(!m_cancel_scopes);
110 
111   ExtractDIEsRWLocked();
112   scoped.m_clear_dies = true;
113   return scoped;
114 }
115 
116 DWARFUnit::ScopedExtractDIEs::ScopedExtractDIEs(DWARFUnit *cu) : m_cu(cu) {
117   lldbassert(m_cu);
118   m_cu->m_die_array_scoped_mutex.lock_shared();
119 }
120 
121 DWARFUnit::ScopedExtractDIEs::~ScopedExtractDIEs() {
122   if (!m_cu)
123     return;
124   m_cu->m_die_array_scoped_mutex.unlock_shared();
125   if (!m_clear_dies || m_cu->m_cancel_scopes)
126     return;
127   // Be sure no other ScopedExtractDIEs is running anymore.
128   llvm::sys::ScopedWriter lock_scoped(m_cu->m_die_array_scoped_mutex);
129   llvm::sys::ScopedWriter lock(m_cu->m_die_array_mutex);
130   if (m_cu->m_cancel_scopes)
131     return;
132   m_cu->ClearDIEsRWLocked();
133 }
134 
135 DWARFUnit::ScopedExtractDIEs::ScopedExtractDIEs(ScopedExtractDIEs &&rhs)
136     : m_cu(rhs.m_cu), m_clear_dies(rhs.m_clear_dies) {
137   rhs.m_cu = nullptr;
138 }
139 
140 DWARFUnit::ScopedExtractDIEs &DWARFUnit::ScopedExtractDIEs::operator=(
141     DWARFUnit::ScopedExtractDIEs &&rhs) {
142   m_cu = rhs.m_cu;
143   rhs.m_cu = nullptr;
144   m_clear_dies = rhs.m_clear_dies;
145   return *this;
146 }
147 
148 //----------------------------------------------------------------------
149 // Parses a compile unit and indexes its DIEs, m_die_array_mutex must be
150 // held R/W and m_die_array must be empty.
151 //----------------------------------------------------------------------
152 void DWARFUnit::ExtractDIEsRWLocked() {
153   llvm::sys::ScopedWriter first_die_lock(m_first_die_mutex);
154 
155   static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
156   Timer scoped_timer(
157       func_cat, "%8.8x: DWARFUnit::ExtractDIEsIfNeeded()", m_offset);
158 
159   // Set the offset to that of the first DIE and calculate the start of the
160   // next compilation unit header.
161   lldb::offset_t offset = GetFirstDIEOffset();
162   lldb::offset_t next_cu_offset = GetNextCompileUnitOffset();
163 
164   DWARFDebugInfoEntry die;
165 
166   uint32_t depth = 0;
167   // We are in our compile unit, parse starting at the offset we were told to
168   // parse
169   const DWARFDataExtractor &data = GetData();
170   std::vector<uint32_t> die_index_stack;
171   die_index_stack.reserve(32);
172   die_index_stack.push_back(0);
173   bool prev_die_had_children = false;
174   DWARFFormValue::FixedFormSizes fixed_form_sizes =
175       DWARFFormValue::GetFixedFormSizesForAddressSize(GetAddressByteSize());
176   while (offset < next_cu_offset &&
177          die.FastExtract(data, this, fixed_form_sizes, &offset)) {
178     const bool null_die = die.IsNULL();
179     if (depth == 0) {
180       assert(m_die_array.empty() && "Compile unit DIE already added");
181 
182       // The average bytes per DIE entry has been seen to be around 14-20 so
183       // lets pre-reserve half of that since we are now stripping the NULL
184       // tags.
185 
186       // Only reserve the memory if we are adding children of the main
187       // compile unit DIE. The compile unit DIE is always the first entry, so
188       // if our size is 1, then we are adding the first compile unit child
189       // DIE and should reserve the memory.
190       m_die_array.reserve(GetDebugInfoSize() / 24);
191       m_die_array.push_back(die);
192 
193       if (!m_first_die)
194         AddUnitDIE(m_die_array.front());
195     } else {
196       if (null_die) {
197         if (prev_die_had_children) {
198           // This will only happen if a DIE says is has children but all it
199           // contains is a NULL tag. Since we are removing the NULL DIEs from
200           // the list (saves up to 25% in C++ code), we need a way to let the
201           // DIE know that it actually doesn't have children.
202           if (!m_die_array.empty())
203             m_die_array.back().SetHasChildren(false);
204         }
205       } else {
206         die.SetParentIndex(m_die_array.size() - die_index_stack[depth - 1]);
207 
208         if (die_index_stack.back())
209           m_die_array[die_index_stack.back()].SetSiblingIndex(
210               m_die_array.size() - die_index_stack.back());
211 
212         // Only push the DIE if it isn't a NULL DIE
213         m_die_array.push_back(die);
214       }
215     }
216 
217     if (null_die) {
218       // NULL DIE.
219       if (!die_index_stack.empty())
220         die_index_stack.pop_back();
221 
222       if (depth > 0)
223         --depth;
224       prev_die_had_children = false;
225     } else {
226       die_index_stack.back() = m_die_array.size() - 1;
227       // Normal DIE
228       const bool die_has_children = die.HasChildren();
229       if (die_has_children) {
230         die_index_stack.push_back(0);
231         ++depth;
232       }
233       prev_die_had_children = die_has_children;
234     }
235 
236     if (depth == 0)
237       break; // We are done with this compile unit!
238   }
239 
240   if (!m_die_array.empty()) {
241     if (m_first_die) {
242       // Only needed for the assertion.
243       m_first_die.SetHasChildren(m_die_array.front().HasChildren());
244       lldbassert(m_first_die == m_die_array.front());
245     }
246     m_first_die = m_die_array.front();
247   }
248 
249   m_die_array.shrink_to_fit();
250 
251   if (m_dwo_symbol_file) {
252     DWARFUnit *dwo_cu = m_dwo_symbol_file->GetCompileUnit();
253     dwo_cu->ExtractDIEsIfNeeded();
254   }
255 }
256 
257 // This is used when a split dwarf is enabled.
258 // A skeleton compilation unit may contain the DW_AT_str_offsets_base attribute
259 // that points to the first string offset of the CU contribution to the
260 // .debug_str_offsets. At the same time, the corresponding split debug unit also
261 // may use DW_FORM_strx* forms pointing to its own .debug_str_offsets.dwo and
262 // for that case, we should find the offset (skip the section header).
263 static void SetDwoStrOffsetsBase(DWARFUnit *dwo_cu) {
264   lldb::offset_t baseOffset = 0;
265 
266   const DWARFDataExtractor &strOffsets =
267       dwo_cu->GetSymbolFileDWARF()->get_debug_str_offsets_data();
268   uint64_t length = strOffsets.GetU32(&baseOffset);
269   if (length == 0xffffffff)
270     length = strOffsets.GetU64(&baseOffset);
271 
272   // Check version.
273   if (strOffsets.GetU16(&baseOffset) < 5)
274     return;
275 
276   // Skip padding.
277   baseOffset += 2;
278 
279   dwo_cu->SetStrOffsetsBase(baseOffset);
280 }
281 
282 // m_die_array_mutex must be already held as read/write.
283 void DWARFUnit::AddUnitDIE(const DWARFDebugInfoEntry &cu_die) {
284   dw_addr_t addr_base = cu_die.GetAttributeValueAsUnsigned(
285       m_dwarf, this, DW_AT_addr_base, LLDB_INVALID_ADDRESS);
286   if (addr_base != LLDB_INVALID_ADDRESS)
287     SetAddrBase(addr_base);
288 
289   dw_addr_t ranges_base = cu_die.GetAttributeValueAsUnsigned(
290       m_dwarf, this, DW_AT_rnglists_base, LLDB_INVALID_ADDRESS);
291   if (ranges_base != LLDB_INVALID_ADDRESS)
292     SetRangesBase(ranges_base);
293 
294   SetStrOffsetsBase(cu_die.GetAttributeValueAsUnsigned(
295       m_dwarf, this, DW_AT_str_offsets_base, 0));
296 
297   uint64_t base_addr = cu_die.GetAttributeValueAsAddress(
298       m_dwarf, this, DW_AT_low_pc, LLDB_INVALID_ADDRESS);
299   if (base_addr == LLDB_INVALID_ADDRESS)
300     base_addr = cu_die.GetAttributeValueAsAddress(
301         m_dwarf, this, DW_AT_entry_pc, 0);
302   SetBaseAddress(base_addr);
303 
304   std::unique_ptr<SymbolFileDWARFDwo> dwo_symbol_file =
305       m_dwarf->GetDwoSymbolFileForCompileUnit(*this, cu_die);
306   if (!dwo_symbol_file)
307     return;
308 
309   DWARFUnit *dwo_cu = dwo_symbol_file->GetCompileUnit();
310   if (!dwo_cu)
311     return; // Can't fetch the compile unit from the dwo file.
312 
313   DWARFBaseDIE dwo_cu_die = dwo_cu->GetUnitDIEOnly();
314   if (!dwo_cu_die.IsValid())
315     return; // Can't fetch the compile unit DIE from the dwo file.
316 
317   uint64_t main_dwo_id =
318       cu_die.GetAttributeValueAsUnsigned(m_dwarf, this, DW_AT_GNU_dwo_id, 0);
319   uint64_t sub_dwo_id =
320       dwo_cu_die.GetAttributeValueAsUnsigned(DW_AT_GNU_dwo_id, 0);
321   if (main_dwo_id != sub_dwo_id)
322     return; // The 2 dwo ID isn't match. Don't use the dwo file as it belongs to
323   // a differectn compilation.
324 
325   m_dwo_symbol_file = std::move(dwo_symbol_file);
326 
327   // Here for DWO CU we want to use the address base set in the skeleton unit
328   // (DW_AT_addr_base) if it is available and use the DW_AT_GNU_addr_base
329   // otherwise. We do that because pre-DWARF v5 could use the DW_AT_GNU_*
330   // attributes which were applicable to the DWO units. The corresponding
331   // DW_AT_* attributes standardized in DWARF v5 are also applicable to the main
332   // unit in contrast.
333   if (addr_base == LLDB_INVALID_ADDRESS)
334     addr_base = cu_die.GetAttributeValueAsUnsigned(m_dwarf, this,
335                                                    DW_AT_GNU_addr_base, 0);
336   dwo_cu->SetAddrBase(addr_base);
337 
338   if (ranges_base == LLDB_INVALID_ADDRESS)
339     ranges_base = cu_die.GetAttributeValueAsUnsigned(m_dwarf, this,
340                                                      DW_AT_GNU_ranges_base, 0);
341   dwo_cu->SetRangesBase(ranges_base);
342 
343   dwo_cu->SetBaseObjOffset(m_offset);
344 
345   SetDwoStrOffsetsBase(dwo_cu);
346 }
347 
348 DWARFDIE DWARFUnit::LookupAddress(const dw_addr_t address) {
349   if (DIE()) {
350     const DWARFDebugAranges &func_aranges = GetFunctionAranges();
351 
352     // Re-check the aranges auto pointer contents in case it was created above
353     if (!func_aranges.IsEmpty())
354       return GetDIE(func_aranges.FindAddress(address));
355   }
356   return DWARFDIE();
357 }
358 
359 size_t DWARFUnit::AppendDIEsWithTag(const dw_tag_t tag,
360                                     std::vector<DWARFDIE> &dies,
361                                     uint32_t depth) const {
362   size_t old_size = dies.size();
363   {
364     llvm::sys::ScopedReader lock(m_die_array_mutex);
365     DWARFDebugInfoEntry::const_iterator pos;
366     DWARFDebugInfoEntry::const_iterator end = m_die_array.end();
367     for (pos = m_die_array.begin(); pos != end; ++pos) {
368       if (pos->Tag() == tag)
369         dies.emplace_back(this, &(*pos));
370     }
371   }
372 
373   // Return the number of DIEs added to the collection
374   return dies.size() - old_size;
375 }
376 
377 lldb::user_id_t DWARFUnit::GetID() const {
378   dw_offset_t local_id =
379       m_base_obj_offset != DW_INVALID_OFFSET ? m_base_obj_offset : m_offset;
380   if (m_dwarf)
381     return DIERef(local_id, local_id).GetUID(m_dwarf);
382   else
383     return local_id;
384 }
385 
386 dw_offset_t DWARFUnit::GetNextCompileUnitOffset() const {
387   return m_offset + GetLengthByteSize() + GetLength();
388 }
389 
390 size_t DWARFUnit::GetDebugInfoSize() const {
391   return GetLengthByteSize() + GetLength() - GetHeaderByteSize();
392 }
393 
394 const DWARFAbbreviationDeclarationSet *DWARFUnit::GetAbbreviations() const {
395   return m_abbrevs;
396 }
397 
398 dw_offset_t DWARFUnit::GetAbbrevOffset() const {
399   return m_abbrevs ? m_abbrevs->GetOffset() : DW_INVALID_OFFSET;
400 }
401 
402 void DWARFUnit::SetAddrBase(dw_addr_t addr_base) { m_addr_base = addr_base; }
403 
404 void DWARFUnit::SetRangesBase(dw_addr_t ranges_base) {
405   m_ranges_base = ranges_base;
406 }
407 
408 void DWARFUnit::SetBaseObjOffset(dw_offset_t base_obj_offset) {
409   m_base_obj_offset = base_obj_offset;
410 }
411 
412 void DWARFUnit::SetStrOffsetsBase(dw_offset_t str_offsets_base) {
413   m_str_offsets_base = str_offsets_base;
414 }
415 
416 // It may be called only with m_die_array_mutex held R/W.
417 void DWARFUnit::ClearDIEsRWLocked() {
418   m_die_array.clear();
419   m_die_array.shrink_to_fit();
420 
421   if (m_dwo_symbol_file)
422     m_dwo_symbol_file->GetCompileUnit()->ClearDIEsRWLocked();
423 }
424 
425 void DWARFUnit::BuildAddressRangeTable(SymbolFileDWARF *dwarf,
426                                        DWARFDebugAranges *debug_aranges) {
427   // This function is usually called if there in no .debug_aranges section in
428   // order to produce a compile unit level set of address ranges that is
429   // accurate.
430 
431   size_t num_debug_aranges = debug_aranges->GetNumRanges();
432 
433   // First get the compile unit DIE only and check if it has a DW_AT_ranges
434   const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
435 
436   const dw_offset_t cu_offset = GetOffset();
437   if (die) {
438     DWARFRangeList ranges;
439     const size_t num_ranges =
440         die->GetAttributeAddressRanges(dwarf, this, ranges, false);
441     if (num_ranges > 0) {
442       // This compile unit has DW_AT_ranges, assume this is correct if it is
443       // present since clang no longer makes .debug_aranges by default and it
444       // emits DW_AT_ranges for DW_TAG_compile_units. GCC also does this with
445       // recent GCC builds.
446       for (size_t i = 0; i < num_ranges; ++i) {
447         const DWARFRangeList::Entry &range = ranges.GetEntryRef(i);
448         debug_aranges->AppendRange(cu_offset, range.GetRangeBase(),
449                                    range.GetRangeEnd());
450       }
451 
452       return; // We got all of our ranges from the DW_AT_ranges attribute
453     }
454   }
455   // We don't have a DW_AT_ranges attribute, so we need to parse the DWARF
456 
457   // If the DIEs weren't parsed, then we don't want all dies for all compile
458   // units to stay loaded when they weren't needed. So we can end up parsing
459   // the DWARF and then throwing them all away to keep memory usage down.
460   ScopedExtractDIEs clear_dies(ExtractDIEsScoped());
461 
462   die = DIEPtr();
463   if (die)
464     die->BuildAddressRangeTable(dwarf, this, debug_aranges);
465 
466   if (debug_aranges->GetNumRanges() == num_debug_aranges) {
467     // We got nothing from the functions, maybe we have a line tables only
468     // situation. Check the line tables and build the arange table from this.
469     SymbolContext sc;
470     sc.comp_unit = dwarf->GetCompUnitForDWARFCompUnit(this);
471     if (sc.comp_unit) {
472       SymbolFileDWARFDebugMap *debug_map_sym_file =
473           m_dwarf->GetDebugMapSymfile();
474       if (debug_map_sym_file == NULL) {
475         LineTable *line_table = sc.comp_unit->GetLineTable();
476 
477         if (line_table) {
478           LineTable::FileAddressRanges file_ranges;
479           const bool append = true;
480           const size_t num_ranges =
481               line_table->GetContiguousFileAddressRanges(file_ranges, append);
482           for (uint32_t idx = 0; idx < num_ranges; ++idx) {
483             const LineTable::FileAddressRanges::Entry &range =
484                 file_ranges.GetEntryRef(idx);
485             debug_aranges->AppendRange(cu_offset, range.GetRangeBase(),
486                                        range.GetRangeEnd());
487           }
488         }
489       } else
490         debug_map_sym_file->AddOSOARanges(dwarf, debug_aranges);
491     }
492   }
493 
494   if (debug_aranges->GetNumRanges() == num_debug_aranges) {
495     // We got nothing from the functions, maybe we have a line tables only
496     // situation. Check the line tables and build the arange table from this.
497     SymbolContext sc;
498     sc.comp_unit = dwarf->GetCompUnitForDWARFCompUnit(this);
499     if (sc.comp_unit) {
500       LineTable *line_table = sc.comp_unit->GetLineTable();
501 
502       if (line_table) {
503         LineTable::FileAddressRanges file_ranges;
504         const bool append = true;
505         const size_t num_ranges =
506             line_table->GetContiguousFileAddressRanges(file_ranges, append);
507         for (uint32_t idx = 0; idx < num_ranges; ++idx) {
508           const LineTable::FileAddressRanges::Entry &range =
509               file_ranges.GetEntryRef(idx);
510           debug_aranges->AppendRange(GetOffset(), range.GetRangeBase(),
511                                      range.GetRangeEnd());
512         }
513       }
514     }
515   }
516 }
517 
518 lldb::ByteOrder DWARFUnit::GetByteOrder() const {
519   return m_dwarf->GetObjectFile()->GetByteOrder();
520 }
521 
522 TypeSystem *DWARFUnit::GetTypeSystem() {
523   if (m_dwarf)
524     return m_dwarf->GetTypeSystemForLanguage(GetLanguageType());
525   else
526     return nullptr;
527 }
528 
529 DWARFFormValue::FixedFormSizes DWARFUnit::GetFixedFormSizes() {
530   return DWARFFormValue::GetFixedFormSizesForAddressSize(GetAddressByteSize());
531 }
532 
533 void DWARFUnit::SetBaseAddress(dw_addr_t base_addr) { m_base_addr = base_addr; }
534 
535 //----------------------------------------------------------------------
536 // Compare function DWARFDebugAranges::Range structures
537 //----------------------------------------------------------------------
538 static bool CompareDIEOffset(const DWARFDebugInfoEntry &die,
539                              const dw_offset_t die_offset) {
540   return die.GetOffset() < die_offset;
541 }
542 
543 //----------------------------------------------------------------------
544 // GetDIE()
545 //
546 // Get the DIE (Debug Information Entry) with the specified offset by first
547 // checking if the DIE is contained within this compile unit and grabbing the
548 // DIE from this compile unit. Otherwise we grab the DIE from the DWARF file.
549 //----------------------------------------------------------------------
550 DWARFDIE
551 DWARFUnit::GetDIE(dw_offset_t die_offset) {
552   if (die_offset != DW_INVALID_OFFSET) {
553     if (GetDwoSymbolFile())
554       return GetDwoSymbolFile()->GetCompileUnit()->GetDIE(die_offset);
555 
556     if (ContainsDIEOffset(die_offset)) {
557       ExtractDIEsIfNeeded();
558       DWARFDebugInfoEntry::const_iterator end = m_die_array.cend();
559       DWARFDebugInfoEntry::const_iterator pos =
560           lower_bound(m_die_array.cbegin(), end, die_offset, CompareDIEOffset);
561       if (pos != end) {
562         if (die_offset == (*pos).GetOffset())
563           return DWARFDIE(this, &(*pos));
564       }
565     } else {
566       // Don't specify the compile unit offset as we don't know it because the
567       // DIE belongs to
568       // a different compile unit in the same symbol file.
569       return m_dwarf->DebugInfo()->GetDIEForDIEOffset(die_offset);
570     }
571   }
572   return DWARFDIE(); // Not found
573 }
574 
575 uint8_t DWARFUnit::GetAddressByteSize(const DWARFUnit *cu) {
576   if (cu)
577     return cu->GetAddressByteSize();
578   return DWARFUnit::GetDefaultAddressSize();
579 }
580 
581 uint8_t DWARFUnit::GetDefaultAddressSize() { return 4; }
582 
583 void *DWARFUnit::GetUserData() const { return m_user_data; }
584 
585 void DWARFUnit::SetUserData(void *d) {
586   m_user_data = d;
587   if (m_dwo_symbol_file)
588     m_dwo_symbol_file->GetCompileUnit()->SetUserData(d);
589 }
590 
591 bool DWARFUnit::Supports_DW_AT_APPLE_objc_complete_type() {
592   return GetProducer() != eProducerLLVMGCC;
593 }
594 
595 bool DWARFUnit::DW_AT_decl_file_attributes_are_invalid() {
596   // llvm-gcc makes completely invalid decl file attributes and won't ever be
597   // fixed, so we need to know to ignore these.
598   return GetProducer() == eProducerLLVMGCC;
599 }
600 
601 bool DWARFUnit::Supports_unnamed_objc_bitfields() {
602   if (GetProducer() == eProducerClang) {
603     const uint32_t major_version = GetProducerVersionMajor();
604     return major_version > 425 ||
605            (major_version == 425 && GetProducerVersionUpdate() >= 13);
606   }
607   return true; // Assume all other compilers didn't have incorrect ObjC bitfield
608                // info
609 }
610 
611 SymbolFileDWARF *DWARFUnit::GetSymbolFileDWARF() const { return m_dwarf; }
612 
613 void DWARFUnit::ParseProducerInfo() {
614   m_producer_version_major = UINT32_MAX;
615   m_producer_version_minor = UINT32_MAX;
616   m_producer_version_update = UINT32_MAX;
617 
618   const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
619   if (die) {
620 
621     const char *producer_cstr =
622         die->GetAttributeValueAsString(m_dwarf, this, DW_AT_producer, NULL);
623     if (producer_cstr) {
624       RegularExpression llvm_gcc_regex(
625           llvm::StringRef("^4\\.[012]\\.[01] \\(Based on Apple "
626                           "Inc\\. build [0-9]+\\) \\(LLVM build "
627                           "[\\.0-9]+\\)$"));
628       if (llvm_gcc_regex.Execute(llvm::StringRef(producer_cstr))) {
629         m_producer = eProducerLLVMGCC;
630       } else if (strstr(producer_cstr, "clang")) {
631         static RegularExpression g_clang_version_regex(
632             llvm::StringRef("clang-([0-9]+)\\.([0-9]+)\\.([0-9]+)"));
633         RegularExpression::Match regex_match(3);
634         if (g_clang_version_regex.Execute(llvm::StringRef(producer_cstr),
635                                           &regex_match)) {
636           std::string str;
637           if (regex_match.GetMatchAtIndex(producer_cstr, 1, str))
638             m_producer_version_major =
639                 StringConvert::ToUInt32(str.c_str(), UINT32_MAX, 10);
640           if (regex_match.GetMatchAtIndex(producer_cstr, 2, str))
641             m_producer_version_minor =
642                 StringConvert::ToUInt32(str.c_str(), UINT32_MAX, 10);
643           if (regex_match.GetMatchAtIndex(producer_cstr, 3, str))
644             m_producer_version_update =
645                 StringConvert::ToUInt32(str.c_str(), UINT32_MAX, 10);
646         }
647         m_producer = eProducerClang;
648       } else if (strstr(producer_cstr, "GNU"))
649         m_producer = eProducerGCC;
650     }
651   }
652   if (m_producer == eProducerInvalid)
653     m_producer = eProcucerOther;
654 }
655 
656 DWARFProducer DWARFUnit::GetProducer() {
657   if (m_producer == eProducerInvalid)
658     ParseProducerInfo();
659   return m_producer;
660 }
661 
662 uint32_t DWARFUnit::GetProducerVersionMajor() {
663   if (m_producer_version_major == 0)
664     ParseProducerInfo();
665   return m_producer_version_major;
666 }
667 
668 uint32_t DWARFUnit::GetProducerVersionMinor() {
669   if (m_producer_version_minor == 0)
670     ParseProducerInfo();
671   return m_producer_version_minor;
672 }
673 
674 uint32_t DWARFUnit::GetProducerVersionUpdate() {
675   if (m_producer_version_update == 0)
676     ParseProducerInfo();
677   return m_producer_version_update;
678 }
679 LanguageType DWARFUnit::LanguageTypeFromDWARF(uint64_t val) {
680   // Note: user languages between lo_user and hi_user must be handled
681   // explicitly here.
682   switch (val) {
683   case DW_LANG_Mips_Assembler:
684     return eLanguageTypeMipsAssembler;
685   case DW_LANG_GOOGLE_RenderScript:
686     return eLanguageTypeExtRenderScript;
687   default:
688     return static_cast<LanguageType>(val);
689   }
690 }
691 
692 LanguageType DWARFUnit::GetLanguageType() {
693   if (m_language_type != eLanguageTypeUnknown)
694     return m_language_type;
695 
696   const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
697   if (die)
698     m_language_type = LanguageTypeFromDWARF(
699         die->GetAttributeValueAsUnsigned(m_dwarf, this, DW_AT_language, 0));
700   return m_language_type;
701 }
702 
703 bool DWARFUnit::GetIsOptimized() {
704   if (m_is_optimized == eLazyBoolCalculate) {
705     const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
706     if (die) {
707       m_is_optimized = eLazyBoolNo;
708       if (die->GetAttributeValueAsUnsigned(m_dwarf, this, DW_AT_APPLE_optimized,
709                                            0) == 1) {
710         m_is_optimized = eLazyBoolYes;
711       }
712     }
713   }
714   return m_is_optimized == eLazyBoolYes;
715 }
716 
717 FileSpec::Style DWARFUnit::GetPathStyle() {
718   if (!m_comp_dir)
719     ComputeCompDirAndGuessPathStyle();
720   return m_comp_dir->GetPathStyle();
721 }
722 
723 const FileSpec &DWARFUnit::GetCompilationDirectory() {
724   if (!m_comp_dir)
725     ComputeCompDirAndGuessPathStyle();
726   return *m_comp_dir;
727 }
728 
729 // DWARF2/3 suggests the form hostname:pathname for compilation directory.
730 // Remove the host part if present.
731 static llvm::StringRef
732 removeHostnameFromPathname(llvm::StringRef path_from_dwarf) {
733   llvm::StringRef host, path;
734   std::tie(host, path) = path_from_dwarf.split(':');
735 
736   if (host.contains('/'))
737     return path_from_dwarf;
738 
739   // check whether we have a windows path, and so the first character is a
740   // drive-letter not a hostname.
741   if (host.size() == 1 && llvm::isAlpha(host[0]) && path.startswith("\\"))
742     return path_from_dwarf;
743 
744   return path;
745 }
746 
747 static FileSpec resolveCompDir(const FileSpec &path) {
748   bool is_symlink = SymbolFileDWARF::GetSymlinkPaths().FindFileIndex(
749                         0, path, /*full*/ true) != UINT32_MAX;
750 
751   if (!is_symlink)
752     return path;
753 
754   namespace fs = llvm::sys::fs;
755   if (fs::get_file_type(path.GetPath(), false) != fs::file_type::symlink_file)
756     return path;
757 
758   FileSpec resolved_symlink;
759   const auto error = FileSystem::Instance().Readlink(path, resolved_symlink);
760   if (error.Success())
761     return resolved_symlink;
762 
763   return path;
764 }
765 
766 void DWARFUnit::ComputeCompDirAndGuessPathStyle() {
767   m_comp_dir = FileSpec();
768   const DWARFDebugInfoEntry *die = GetUnitDIEPtrOnly();
769   if (!die)
770     return;
771 
772   llvm::StringRef comp_dir = removeHostnameFromPathname(
773       die->GetAttributeValueAsString(m_dwarf, this, DW_AT_comp_dir, NULL));
774   if (!comp_dir.empty()) {
775     FileSpec::Style comp_dir_style =
776         FileSpec::GuessPathStyle(comp_dir).getValueOr(FileSpec::Style::native);
777     m_comp_dir = resolveCompDir(FileSpec(comp_dir, comp_dir_style));
778   } else {
779     // Try to detect the style based on the DW_AT_name attribute, but just store
780     // the detected style in the m_comp_dir field.
781     const char *name =
782         die->GetAttributeValueAsString(m_dwarf, this, DW_AT_name, NULL);
783     m_comp_dir = FileSpec(
784         "", FileSpec::GuessPathStyle(name).getValueOr(FileSpec::Style::native));
785   }
786 }
787 
788 SymbolFileDWARFDwo *DWARFUnit::GetDwoSymbolFile() const {
789   return m_dwo_symbol_file.get();
790 }
791 
792 dw_offset_t DWARFUnit::GetBaseObjOffset() const { return m_base_obj_offset; }
793 
794 const DWARFDebugAranges &DWARFUnit::GetFunctionAranges() {
795   if (m_func_aranges_up == NULL) {
796     m_func_aranges_up.reset(new DWARFDebugAranges());
797     const DWARFDebugInfoEntry *die = DIEPtr();
798     if (die)
799       die->BuildFunctionAddressRangeTable(m_dwarf, this,
800                                           m_func_aranges_up.get());
801 
802     if (m_dwo_symbol_file) {
803       DWARFUnit *dwo_cu = m_dwo_symbol_file->GetCompileUnit();
804       const DWARFDebugInfoEntry *dwo_die = dwo_cu->DIEPtr();
805       if (dwo_die)
806         dwo_die->BuildFunctionAddressRangeTable(m_dwo_symbol_file.get(), dwo_cu,
807                                                 m_func_aranges_up.get());
808     }
809 
810     const bool minimize = false;
811     m_func_aranges_up->Sort(minimize);
812   }
813   return *m_func_aranges_up;
814 }
815 
816